framework,version,device,op_name,kernel_source,gemm_dtype,m,n,k,latency
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,10240,12.64310359954834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,10240,13.521568298339844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,12288,14.436287879943848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,12288,16.116256713867188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,16384,21.3919677734375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,16384,23.058143615722656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,8192,9.86736011505127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,8192,9.749631881713867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,12288,12.507488250732422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,7168,8.470144271850586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,7168,8.579615592956543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,6144,7.189792156219482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,6144,7.4720001220703125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,16384,17.056320190429688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,8192,8.501376152038574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,10240,10.5066556930542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,5120,6.053408145904541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,4096,5.042816162109375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,4096,4.933887958526611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,5120,6.203264236450195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,7168,7.5977277755737305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,3584,4.372064113616943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,6144,6.592095851898193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,4096,4.526912212371826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,3584,4.496767997741699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,3072,3.9065279960632324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,3072,3.7989439964294434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,5120,5.532447814941406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,2560,3.251487970352173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,2560,3.303136110305786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,2048,2.6662399768829346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,2048,2.73091197013855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,3584,4.025407791137695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,1536,2.10809588432312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,3072,3.5207359790802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,1024,1.5251840353012085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,1024,1.5497280359268188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,1536,2.1231679916381836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,2560,3.0245120525360107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,2048,2.5234880447387695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,768,1.2301119565963745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,768,1.266975998878479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,512,0.939520001411438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,256,0.49830400943756104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,512,0.9584320187568665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,256,0.49907198548316956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,1536,2.0418241024017334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,128,0.3839679956436157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,128,0.3871999979019165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,1024,1.544319987297058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,768,1.2776000499725342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,64,0.38201600313186646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,64,0.3853119909763336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,256,0.7641919851303101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,512,1.0260800123214722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,32,0.38121598958969116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,32,0.3826879858970642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,128,0.590719997882843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,64,0.5075520277023315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,32,0.513696014881134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,12288,4.272543907165527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,12288,3.6361279487609863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,16384,4.882175922393799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,16384,5.605247974395752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,10240,4.145472049713135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,10240,3.0255041122436523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,16384,4.217951774597168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,8192,3.1148159503936768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,12288,3.201632022857666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,10240,2.7149760723114014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,8192,2.4448320865631104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,8192,2.1790080070495605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,7168,2.166815996170044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,7168,2.3338561058044434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,6144,1.8277440071105957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,7168,1.947424054145813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,5120,1.7013440132141113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,6144,1.878335952758789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,5120,1.5837759971618652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,4096,1.2921279668807983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,4096,1.2702399492263794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,6144,1.6764479875564575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,4096,1.161952018737793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,5120,1.4313280582427979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,3584,1.1443519592285156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,3584,1.1242560148239136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,3072,0.9699839949607849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,3584,1.03603196144104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,3072,1.0030080080032349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,2560,0.8347839713096619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,2560,0.8501120209693909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,2048,0.6924160122871399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,3072,0.9054399728775024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,2048,0.7008960247039795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,2560,0.7773439884185791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,1536,0.5422080159187317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,1536,0.55103999376297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,65536,22.741600036621094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,1536,0.5230720043182373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,1024,0.4070720076560974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,1024,0.3954240083694458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,2048,0.6508479714393616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,768,0.32207998633384705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,1024,0.3925440013408661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,768,0.3304319977760315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,512,0.24729600548744202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,256,0.1356160044670105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,768,0.33238399028778076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,256,0.1308480054140091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,512,0.2622080147266388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,128,0.10265599936246872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,256,0.19599999487400055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,128,0.10409600287675858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,512,0.25494399666786194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,64,0.09225600212812424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,128,0.15302400290966034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,64,0.09417600184679031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,32,0.10047999769449234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,32,0.1034879982471466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,64,0.1329919993877411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,32,0.13331200182437897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,65536,22.575231552124023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,12288,3.655872106552124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,16384,4.04099178314209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,16384,5.216671943664551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,12288,2.748703956604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,16384,3.1393918991088867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,10240,2.9220800399780273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,12288,2.404416084289551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,10240,2.2902719974517822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,8192,1.8353919982910156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,10240,2.007040023803711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,8192,1.848479986190796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,7168,1.6213120222091675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,8192,1.6367679834365845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,6144,1.3991039991378784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,7168,1.616096019744873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,6144,1.410207986831665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,7168,1.4505280256271362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,5120,1.168287992477417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,65536,19.410655975341797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,5120,1.1964800357818604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,6144,1.2700159549713135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,4096,0.952127993106842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,4096,0.9773759841918945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,65536,17.096160888671875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,5120,1.0677759647369385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,3584,0.8532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,3584,0.8674560189247131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,65536,16.774015426635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,3072,0.7422720193862915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,4096,0.8721280097961426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,3072,0.7553920149803162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,2560,0.6284480094909668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,3584,0.7815999984741211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,2048,0.5332480072975159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,2048,0.520799994468689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,3072,0.6872959733009338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,2560,0.643455982208252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,1536,0.4123840034008026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,1024,0.3022080063819885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,2048,0.4875200092792511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,2560,0.585856020450592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,1024,0.3099200129508972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,1536,0.4222719967365265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,1536,0.39504000544548035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,512,0.18937599658966064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,512,0.19622400403022766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,768,0.24934400618076324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,768,0.25516799092292786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,1024,0.2983039915561676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,768,0.25094398856163025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,256,0.10470400005578995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,128,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,256,0.10406400263309479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,128,0.08083199709653854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,256,0.1502079963684082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,512,0.20003199577331543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,64,0.07785599678754807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,64,0.07273600250482559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,32,0.07939200103282928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,128,0.11667200177907944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,64,0.1016639992594719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,32,0.07932800054550171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,32,0.1019200012087822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,12288,2.236639976501465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,16384,3.3691840171813965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,16384,2.921504020690918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,12288,2.29532790184021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,16384,2.777791976928711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,10240,1.8560960292816162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,12288,2.0039360523223877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,10240,1.9157119989395142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,8192,1.8839360475540161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,65536,14.784319877624512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,10240,1.7001279592514038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,8192,1.5429439544677734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,7168,1.7014399766921997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,7168,1.3638399839401245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,8192,1.3650879859924316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,6144,1.215872049331665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,6144,1.1904959678649902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,7168,1.2200000286102295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,5120,1.0045119524002075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,6144,1.0505919456481934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,4096,0.8005759716033936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,5120,0.8977919816970825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,5120,0.9901120066642761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,4096,0.8183040022850037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,3584,0.7100480198860168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,65536,14.575615882873535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,3584,0.7228479981422424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,3072,0.6168000102043152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,4096,0.7287039756774902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,3584,0.6519680023193359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,3072,0.6346880197525024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,65536,14.716575622558594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,2560,0.5291839838027954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,2560,0.5392959713935852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,3072,0.566752016544342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,2048,0.4360960125923157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,2048,0.4497919976711273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,1536,0.342848002910614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,2560,0.4901440143585205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,1536,0.351936012506485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,1024,0.2531200051307678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,1024,0.2624639868736267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,2048,0.4097279906272888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,768,0.2168319970369339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,768,0.208639994263649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,1536,0.3306879997253418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,1024,0.2500799894332886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,512,0.16105599701404572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,512,0.16451199352741241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,256,0.08848000317811966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,256,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,768,0.2088319957256317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,512,0.1679680049419403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,128,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,128,0.06844799965620041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,256,0.12556800246238708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,64,0.0628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,64,0.06415999680757523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,128,0.09859199821949005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,32,0.06934399902820587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,32,0.06880000233650208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,64,0.08684799820184708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,32,0.0867839977145195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,12288,2.079967975616455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,12288,1.8723200559616089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,16384,2.4622080326080322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,16384,2.4371519088745117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,16384,2.1111679077148438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,10240,1.5746879577636719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,10240,1.5320639610290527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,12288,1.616320013999939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,8192,1.2583359479904175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,8192,1.2463040351867676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,65536,12.200127601623535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,10240,1.3504639863967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,7168,1.1255680322647095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,7168,1.092128038406372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,8192,1.101088047027588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,6144,0.9477440118789673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,6144,0.9770240187644958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,7168,0.9685440063476562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,5120,0.7951040267944336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,6144,0.8407999873161316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,5120,0.8144959807395935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,4096,0.650592029094696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,4096,0.6692479848861694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,5120,0.7199360132217407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,3584,0.5787839889526367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,3072,0.5003200173377991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,4096,0.5834559798240662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,65536,12.167327880859375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,3584,0.5899199843406677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,3072,0.5144320130348206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,2560,0.4270080029964447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,3584,0.5231680274009705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,65536,11.63856029510498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,2560,0.4371519982814789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,3072,0.46160000562667847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,2048,0.35225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,2560,0.3932799994945526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,2048,0.36214399337768555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,1536,0.2818880081176758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,1536,0.2789120078086853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,1024,0.20396800339221954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,2048,0.328575998544693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,1024,0.2078399956226349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,1536,0.2669120132923126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,768,0.16566400229930878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,768,0.16937600076198578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,1024,0.19996799528598785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,512,0.1303039938211441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,512,0.13251200318336487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,768,0.16713599860668182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,256,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,256,0.06998399645090103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,512,0.13424000144004822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,128,0.055615998804569244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,128,0.056832000613212585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,64,0.05161599814891815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,256,0.10185600072145462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,128,0.07996799796819687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,64,0.053599998354911804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,32,0.056063998490571976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,64,0.07081600278615952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,32,0.07072000205516815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,32,0.05820799991488457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,12288,1.6110080480575562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,16384,2.15116810798645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,16384,2.21891188621521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,16384,1.833631992340088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,10240,1.3503680229187012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,12288,1.6463040113449097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,12288,1.4162240028381348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,65536,9.993375778198242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,10240,1.3660800457000732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,8192,1.086016058921814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,8192,1.1053440570831299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,10240,1.1924480199813843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,7168,0.9807999730110168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,7168,0.9546239972114563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,8192,0.9584320187568665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,6144,0.8284159898757935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,6144,0.8500480055809021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,6144,0.732479989528656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,5120,0.7000640034675598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,7168,0.8489919900894165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,5120,0.7168319821357727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,4096,0.5708159804344177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,5120,0.624671995639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,4096,0.5851200222969055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,65536,10.495327949523926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,3584,0.5047039985656738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,4096,0.5166400074958801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,3584,0.5201600193977356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,3072,0.4528000056743622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,3072,0.4419519901275635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,65536,10.616703987121582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,3584,0.4583359956741333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,2560,0.3761279881000519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,3072,0.40217599272727966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,2560,0.38553598523139954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,2048,0.31068798899650574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,2048,0.31913599371910095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,1536,0.24383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,2560,0.3449920117855072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,1536,0.2512640058994293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,1024,0.179967999458313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,1024,0.18428799510002136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,2048,0.2879680097103119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,1536,0.23206399381160736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,768,0.14742399752140045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,768,0.15279999375343323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,512,0.11481600254774094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,1024,0.17664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,512,0.11849600076675415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,768,0.14800000190734863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,256,0.06505600363016129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,256,0.0652799978852272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,512,0.11872000247240067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,128,0.05097600072622299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,128,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,256,0.09001599997282028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,64,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,128,0.07107199728488922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,64,0.05100800096988678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,32,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,32,0.05190400034189224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,64,0.06278400123119354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,32,0.06332799792289734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,12288,1.3755199909210205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,16384,1.8503040075302124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,16384,2.3503360748291016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,12288,1.4094079732894897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,16384,1.5723520517349243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,10240,1.2670400142669678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,65536,8.701984405517578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,12288,1.2049280405044556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,10240,1.1743359565734863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,8192,0.9327999949455261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,8192,0.9664639830589294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,10240,1.0096960067749023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,7168,0.8150720000267029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,7168,0.8418239951133728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,8192,0.8207039833068848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,6144,0.7164480090141296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,5120,0.598143994808197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,6144,0.7298880219459534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,7168,0.7344319820404053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,5120,0.6161280274391174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,6144,0.6324800252914429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,4096,0.4890559911727905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,4096,0.5052480101585388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,5120,0.5366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,3584,0.44284799695014954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,3584,0.4431999921798706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,65536,8.902048110961914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,4096,0.4362240135669708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,3072,0.3787199854850769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,65536,8.729503631591797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,3072,0.3912320137023926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,3584,0.3964160084724426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,2048,0.2662400007247925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,2560,0.3275519907474518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,1536,0.21011200547218323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,3072,0.34518399834632874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,2048,0.27606400847435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,2560,0.32256001234054565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,2560,0.29635199904441833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,1536,0.21491199731826782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,2048,0.24966399371623993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,1024,0.15568000078201294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,1024,0.160288006067276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,768,0.1271039992570877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,1536,0.20095999538898468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,1024,0.1515520066022873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,768,0.1308159977197647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,512,0.10016000270843506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,512,0.10252799838781357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,768,0.12835200130939484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,256,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,256,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,512,0.10204800218343735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,256,0.07772800326347351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,128,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,128,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,64,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,128,0.062111999839544296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,64,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,64,0.05459199845790863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,32,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,32,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,32,0.055615998804569244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,12288,1.1654399633407593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,16384,1.5329279899597168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,16384,1.5073280334472656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,10240,0.9621760249137878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,12288,1.1767679452896118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,16384,1.3225280046463013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,10240,0.9810240268707275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,65536,7.439072132110596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,8192,0.7712000012397766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,12288,0.9978880286216736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,7168,0.693343997001648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,8192,0.8013759851455688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,10240,0.8465920090675354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,7168,0.709663987159729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,8192,0.6865919828414917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,6144,0.5960320234298706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,6144,0.6098880171775818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,7168,0.6080639958381653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,5120,0.5036479830741882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,6144,0.5256959795951843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,5120,0.5146880149841309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,4096,0.4132480025291443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,4096,0.4220159947872162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,5120,0.4485760033130646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,3584,0.36374399065971375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,3584,0.37462401390075684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,65536,7.704383850097656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,4096,0.36508798599243164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,3072,0.31679999828338623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,65536,7.284160137176514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,3072,0.3258560001850128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,3584,0.3296320140361786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,2560,0.271232008934021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,2560,0.27423998713493347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,2048,0.2306559979915619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,3072,0.28863999247550964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,2048,0.2242559939622879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,1536,0.17750400304794312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,2560,0.24854399263858795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,1536,0.1809920072555542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,2048,0.20793600380420685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,1024,0.13100799918174744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,1024,0.1345919966697693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,768,0.10793600231409073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,1536,0.16764800250530243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,768,0.10982400178909302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,1024,0.12800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,768,0.10713600367307663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,512,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,512,0.0867839977145195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,256,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,256,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,512,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,128,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,128,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,256,0.06611199676990509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,64,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,128,0.052319999784231186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,64,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,64,0.04732799902558327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,32,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,32,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,32,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,16384,1.236448049545288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,16384,1.4480960369110107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,12288,0.9275839924812317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,10240,0.7795519828796387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,16384,1.061568021774292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,12288,0.944703996181488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,65536,6.217696189880371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,10240,0.7906879782676697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,12288,0.8110399842262268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,8192,0.6230080127716064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,8192,0.639743983745575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,10240,0.6776319742202759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,7168,0.5718079805374146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,7168,0.5666559934616089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,8192,0.553056001663208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,6144,0.47651201486587524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,6144,0.4907839894294739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,7168,0.48841598629951477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,5120,0.4076479971408844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,5120,0.4132480025291443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,6144,0.4237760007381439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,4096,0.3314880132675171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,65536,6.149184226989746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,3584,0.2938239872455597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,5120,0.3599039912223816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,4096,0.3410879969596863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,65536,5.763232231140137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,3584,0.30243200063705444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,3072,0.25600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,4096,0.29468798637390137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,3072,0.262688010931015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,2560,0.21545599400997162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,3584,0.26393601298332214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,2560,0.2237119972705841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,3072,0.23308800160884857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,2048,0.1810240000486374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,2048,0.18742400407791138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,2560,0.19952000677585602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,1536,0.146464005112648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,1536,0.14284799993038177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,1024,0.10627199709415436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,2048,0.16678400337696075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,1536,0.13606399297714233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,1024,0.10812799632549286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,768,0.08780799806118011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,768,0.08953599631786346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,512,0.06844799965620041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,512,0.07062400132417679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,1024,0.10236799716949463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,768,0.08640000224113464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,512,0.06988800317049026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,256,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,256,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,128,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,128,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,256,0.053279999643564224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,64,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,128,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,64,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,64,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,32,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,32,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,32,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,12288,0.7987200021743774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,12288,0.8284159898757935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,16384,1.0899840593338013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,16384,1.0568000078201294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,65536,5.127776145935059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,16384,0.926751971244812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,10240,0.6776959896087646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,10240,0.6875519752502441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,12288,0.7050880193710327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,8192,0.5472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,8192,0.5654399991035461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,7168,0.4781759977340698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,10240,0.5951359868049622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,6144,0.4143359959125519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,8192,0.48124799132347107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,7168,0.4970879852771759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,6144,0.4254400134086609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,5120,0.3500480055809021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,7168,0.43059200048446655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,5120,0.3616639971733093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,6144,0.37248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,4096,0.28835201263427734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,5120,0.315744012594223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,4096,0.295199990272522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,3584,0.25782400369644165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,4096,0.25900799036026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,65536,5.17033576965332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,3072,0.22355200350284576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,3584,0.26281601190567017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,65536,5.055263996124268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,3072,0.22991999983787537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,3584,0.23238399624824524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,2560,0.1921280026435852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,3072,0.20390400290489197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,2560,0.19593599438667297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,2048,0.15865600109100342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,2048,0.1637759953737259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,2560,0.1754239946603775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,1536,0.1271039992570877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,2048,0.14815999567508698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,1536,0.12963199615478516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,1536,0.12003199756145477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,1024,0.09478399902582169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,1024,0.09612800180912018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,768,0.07779199630022049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,1024,0.09171199798583984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,512,0.06140799820423126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,768,0.07667200267314911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,512,0.0634239986538887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,512,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,256,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,128,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,256,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,128,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,256,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,128,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,64,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,768,0.07919999957084656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,64,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,32,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,32,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,64,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,32,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,12288,0.6920639872550964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,16384,0.9354239702224731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,16384,0.9107199907302856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,65536,4.354112148284912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,12288,0.7083200216293335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,16384,0.7954559922218323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,10240,0.5774719715118408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,10240,0.5904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,12288,0.608896017074585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,8192,0.46700799465179443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,8192,0.4829440116882324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,10240,0.5103039741516113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,7168,0.4147520065307617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,7168,0.4248639941215515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,8192,0.4143359959125519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,6144,0.3598400056362152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,7168,0.3684160113334656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,6144,0.3689599931240082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,5120,0.30502399802207947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,4096,0.24803200364112854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,6144,0.3195840120315552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,5120,0.3123840093612671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,65536,4.480480194091797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,4096,0.2528960108757019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,5120,0.27139198780059814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,65536,4.375711917877197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,3584,0.2223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,4096,0.22460800409317017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,3584,0.22515200078487396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,3072,0.1915840059518814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,3072,0.1974720060825348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,2560,0.16415999829769135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,2560,0.1685120016336441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,3584,0.1998720020055771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,3072,0.17535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,2560,0.1512639969587326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,2048,0.13683199882507324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,2048,0.1408960074186325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,1536,0.11168000102043152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,1536,0.10992000252008438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,2048,0.12723200023174286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,1024,0.08182399719953537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,1536,0.10294400155544281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,1024,0.08390399813652039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,768,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,768,0.06784000247716904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,1024,0.07820799946784973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,512,0.05382400006055832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,768,0.0663359984755516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,512,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,256,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,512,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,256,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,256,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,128,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,128,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,64,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,64,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,128,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,64,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,32,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,32,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,32,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,12288,0.578719973564148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,16384,0.756991982460022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,65536,3.750688076019287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,65536,3.880352020263672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,16384,0.7773759961128235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,16384,0.6656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,12288,0.5923839807510376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,10240,0.4965440034866333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,10240,0.4854080080986023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,10240,0.427839994430542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,8192,0.3903360068798065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,8192,0.40406399965286255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,12288,0.505952000617981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,7168,0.34751999378204346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,8192,0.34969601035118103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,7168,0.35628798604011536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,6144,0.29814401268959045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,7168,0.3073920011520386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,5120,0.255295991897583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,6144,0.26924800872802734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,5120,0.26182401180267334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,6144,0.3087039887905121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,4096,0.2075520008802414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,4096,0.21318399906158447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,3584,0.1863040030002594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,5120,0.2277120053768158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,4096,0.18915200233459473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,65536,3.731935977935791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,3584,0.18966400623321533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,3072,0.1624000072479248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,3072,0.16598400473594666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,3584,0.16787199676036835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,2560,0.1388159990310669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,2560,0.14124800264835358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,3072,0.14713600277900696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,2048,0.11612799763679504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,2048,0.11894399672746658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,2560,0.12800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,1536,0.09299200028181076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,2048,0.1069440022110939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,1536,0.09481599926948547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,1024,0.07030399888753891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,1536,0.08649600297212601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,1024,0.0719359964132309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,768,0.05830400064587593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,1024,0.0663359984755516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,768,0.05955199897289276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,768,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,512,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,512,0.04732799902558327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,256,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,512,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,256,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,128,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,256,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,128,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,128,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,64,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,64,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,64,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,32,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,32,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,32,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,12288,0.464352011680603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,65536,3.1760001182556152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,16384,0.6069440245628357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,16384,0.6218559741973877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,16384,0.5318080186843872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,12288,0.4744639992713928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,10240,0.38703998923301697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,10240,0.397599995136261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,12288,0.40486401319503784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,8192,0.3161279857158661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,8192,0.3221760094165802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,7168,0.27852800488471985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,10240,0.34220799803733826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,8192,0.2776640057563782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,7168,0.2858560085296631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,6144,0.24054400622844696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,6144,0.24822400510311127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,5120,0.2056639939546585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,7168,0.24582399427890778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,6144,0.2144639939069748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,4096,0.16790400445461273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,5120,0.20953600108623505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,65536,3.0511679649353027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,65536,2.918720006942749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,4096,0.17174400389194489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,5120,0.18214400112628937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,3584,0.15007999539375305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,3584,0.15321600437164307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,4096,0.1502400040626526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,3072,0.13036799430847168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,3072,0.13446399569511414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,3584,0.1361279934644699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,3072,0.1186240017414093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,2560,0.11311999708414078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,2560,0.11459200084209442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,2048,0.09411200135946274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,2560,0.10268799960613251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,2048,0.09673599898815155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,1536,0.07622399926185608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,2048,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,1536,0.07727999985218048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,1024,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,1536,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,1024,0.05878400057554245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,1024,0.05363199859857559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,768,0.04819199815392494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,768,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,768,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,512,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,512,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,512,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,256,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,256,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,256,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,128,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,128,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,128,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,64,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,64,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,32,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,32,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,32,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,65536,2.4904959201812744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,64,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,12288,0.3500800132751465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,16384,0.477728009223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,16384,0.4912320077419281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,12288,0.37539198994636536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,16384,0.4005120098590851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,10240,0.2945919930934906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,10240,0.2999039888381958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,12288,0.3056640028953552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,8192,0.23785600066184998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,10240,0.25974398851394653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,8192,0.24553599953651428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,7168,0.21059200167655945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,8192,0.20953600108623505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,7168,0.21529600024223328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,6144,0.18451200425624847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,7168,0.18831999599933624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,6144,0.18783999979496002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,5120,0.15571199357509613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,65536,2.2704639434814453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,65536,2.365760087966919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,5120,0.15993599593639374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,6144,0.1624639928340912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,4096,0.12915199995040894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,5120,0.13993600010871887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,4096,0.13091200590133667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,3072,0.10099200159311295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,3584,0.11452800035476685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,3584,0.11791999638080597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,4096,0.11446399986743927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,3584,0.10249599814414978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,3072,0.10326399654150009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,3072,0.09094399958848953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,2048,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,2560,0.08720000088214874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,2560,0.08844800293445587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,2048,0.07260800153017044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,2560,0.07887999713420868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,2048,0.066880002617836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,1024,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,1536,0.05686400085687637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,1536,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,1024,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,1536,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,768,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,1024,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,768,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,512,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,512,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,768,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,256,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,256,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,512,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,65536,1.873471975326538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,256,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,128,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,128,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,64,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,128,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,64,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,64,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,32,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,32,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,32,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,12288,0.23712000250816345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,12288,0.24217599630355835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,16384,0.30828800797462463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,16384,0.3176960051059723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,16384,0.26870399713516235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,10240,0.19779199361801147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,12288,0.20588800311088562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,10240,0.2032639980316162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,10240,0.17404800653457642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,8192,0.16182400286197662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,8192,0.16604800522327423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,7168,0.14374400675296783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,8192,0.1416960060596466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,7168,0.14860799908638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,7168,0.12591999769210815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,6144,0.12611199915409088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,6144,0.12809599936008453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,5120,0.10758399963378906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,6144,0.11007999628782272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,65536,1.5543040037155151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,65536,1.5701119899749756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,5120,0.1093439981341362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,4096,0.0888959988951683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,5120,0.09443199634552002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,4096,0.09027200192213058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,3584,0.07964800298213959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,4096,0.07849600166082382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,3584,0.08169600367546082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,3072,0.07001599669456482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,3584,0.07027199864387512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,2560,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,3072,0.062111999839544296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,2560,0.06124800071120262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,2048,0.052032001316547394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,2560,0.053599998354911804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,2048,0.052191998809576035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,65536,1.3020800352096558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,2048,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,1536,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,1536,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,1024,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,1536,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,1024,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,3072,0.07321599870920181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,768,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,1024,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,768,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,768,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,512,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,512,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,512,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,256,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,256,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,128,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,256,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,128,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,128,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,64,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,64,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,32,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,64,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,32,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,32,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,12288,0.18636800348758698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,12288,0.18931199610233307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,16384,0.2391359955072403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,16384,0.24403199553489685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,16384,0.2714560031890869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,10240,0.15667200088500977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,12288,0.2062080055475235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,10240,0.16316799819469452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,8192,0.12931199371814728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,10240,0.17417599260807037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,8192,0.13257600367069244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,7168,0.11369600147008896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,8192,0.14159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,7168,0.1181119978427887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,7168,0.12636800110340118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,6144,0.09980800002813339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,6144,0.10204800218343735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,65536,1.137727975845337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,65536,1.1312639713287354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,6144,0.11023999750614166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,5120,0.08537600189447403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,5120,0.08633600175380707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,5120,0.09382399916648865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,4096,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,4096,0.07251200079917908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,3584,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,3584,0.06492800265550613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,4096,0.07673600316047668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,3584,0.06988800317049026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,3072,0.0551999993622303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,3072,0.05612799897789955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,2560,0.058240000158548355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,3072,0.06095999851822853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,2560,0.04912000149488449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,2048,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,2560,0.052480001002550125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,65536,1.24835205078125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,2048,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,1536,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,1024,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,2048,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,1536,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,1536,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,1024,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,1024,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,768,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,768,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,512,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,768,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,512,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,256,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,256,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,512,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,128,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,128,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,256,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,128,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,64,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,64,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,32,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,32,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,12288,0.12243200093507767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,16384,0.1615999937057495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,16384,0.16368000209331512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,16384,0.13760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,12288,0.10659199953079224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,10240,0.10534399747848511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,10240,0.10745599865913391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,12288,0.12515200674533844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,10240,0.09008000046014786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,8192,0.08582399785518646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,7168,0.07756800204515457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,8192,0.08796799927949905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,8192,0.07344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,7168,0.07955200225114822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,65536,0.8325120210647583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,65536,0.8064960241317749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,6144,0.06867200136184692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,7168,0.06707199662923813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,6144,0.06896000355482101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,6144,0.059007998555898666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,4096,0.05036799982190132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,5120,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,5120,0.04966399818658829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,5120,0.05907199904322624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,4096,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,4096,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,3584,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,3584,0.04531199857592583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,3072,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,3584,0.040832001715898514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,65536,0.6365439891815186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,3072,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,3072,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,2560,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,2560,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,2048,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,2048,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,2560,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,2048,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,1536,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,1536,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,1536,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,1024,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,1024,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,1024,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,768,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,768,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,768,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,512,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,512,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,512,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,256,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,256,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,128,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,128,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,128,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,32,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,64,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,32,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,16384,0.09801600128412247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,12288,0.07648000121116638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,12288,0.07788799703121185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,16384,0.09929600358009338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,16384,0.0987199991941452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,12288,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,10240,0.06592000275850296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,10240,0.06659200042486191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,8192,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,10240,0.06588800251483917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,8192,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,7168,0.04864000156521797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,8192,0.05299200117588043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,7168,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,7168,0.048128001391887665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,65536,0.38633599877357483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,65536,0.404992014169693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,6144,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,6144,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,6144,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,5120,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,5120,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,65536,0.45283201336860657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,4096,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,5120,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,4096,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,4096,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,3584,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,3584,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,3584,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,3072,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,3072,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,2560,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,3072,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,2560,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,2560,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,2048,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,2048,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,2048,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,1536,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,1536,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,1536,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,1024,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,1024,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,768,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,768,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,256,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,128,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,32,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,64,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,32,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,1024,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,12288,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,16384,0.09612800180912018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,12288,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,16384,0.07315199822187424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,16384,0.058111999183893204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,12288,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,10240,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,10240,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,8192,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,10240,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,8192,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,8192,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,7168,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,7168,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,7168,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,65536,0.30460798740386963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,65536,0.3118079900741577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,6144,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,6144,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,65536,0.2483839988708496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,5120,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,6144,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,5120,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,5120,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,4096,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,4096,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,3584,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,3584,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,3072,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,3072,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,2560,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,2048,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,2560,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,2048,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,1536,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,1536,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,1024,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,1024,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,512,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,128,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,64,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,32,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,12288,0.058720000088214874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,12288,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,16384,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,16384,0.07228799909353256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,16384,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,12288,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,10240,0.05167999863624573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,10240,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,8192,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,10240,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,7168,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,8192,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,7168,0.039423998445272446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,7168,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,65536,0.3054080009460449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,65536,0.2754240036010742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,65536,0.15609599649906158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,6144,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,6144,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,6144,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,5120,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,8192,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,5120,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,5120,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,4096,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,4096,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,3584,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,3584,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,3584,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,3072,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,3072,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,2560,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,3072,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,2560,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,2560,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,2048,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,2048,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,2048,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,1536,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,1536,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,1536,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,1024,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,768,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,12288,0.05737600103020668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,12288,0.05817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,16384,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,16384,0.0756480023264885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,16384,0.04460800066590309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,12288,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,10240,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,10240,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,10240,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,8192,0.04262400045990944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,65536,0.27724799513816833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,8192,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,8192,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,7168,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,7168,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,7168,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,65536,0.2950719892978668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,6144,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,6144,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,65536,0.15532800555229187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,5120,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,5120,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,5120,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,4096,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,4096,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,4096,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,3584,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,3584,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,3584,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,3072,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,3072,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,3072,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,2560,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,2560,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,2048,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,2048,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,1536,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,1536,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,1536,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,1024,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,1024,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,768,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,10240,6.043231964111328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,10240,5.879231929779053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,12288,7.217631816864014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,12288,7.018239974975586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,16384,9.560064315795898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,16384,9.291808128356934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,8192,4.7554240226745605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,8192,4.840511798858643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,12288,6.354112148284912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,7168,4.188767910003662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,7168,4.262303829193115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,6144,3.713376045227051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,8192,4.2936320304870605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,6144,3.6112959384918213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,16384,8.29849624633789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,10240,5.299647808074951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,5120,3.0372800827026367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,4096,2.523423910140991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,4096,2.472543954849243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,5120,3.135711908340454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,7168,3.799232006072998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,3584,2.1830079555511475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,6144,3.295488119125366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,4096,2.281791925430298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,3584,2.2537920475006104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,3072,1.963487982749939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,3072,1.9231359958648682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,5120,2.8127360343933105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,2560,1.6352640390396118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,2048,1.3853119611740112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,2048,1.3343360424041748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,3584,2.0327999591827393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,2560,1.6641600131988525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,1536,1.053887963294983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,1024,0.7876799702644348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,1024,0.7709760069847107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,1536,1.0904639959335327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,3072,1.7681280374526978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,768,0.6260799765586853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,2048,1.2769279479980469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,2560,1.5246399641036987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,512,0.4760960042476654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,768,0.6392639875411987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,512,0.4930880069732666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,256,0.25119999051094055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,256,0.24537600576877594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,128,0.2115519940853119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,1536,1.0140800476074219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,128,0.19843199849128723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,512,0.509119987487793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,64,0.20003199577331543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,64,0.19519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,1024,0.7596799731254578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,768,0.638975977897644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,256,0.38278400897979736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,128,0.2996799945831299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,32,0.20070399343967438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,32,0.20163199305534363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,64,0.2606079876422882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,32,0.26131200790405273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,12288,1.831455945968628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,12288,1.8670400381088257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,16384,2.4721601009368896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,16384,2.390399932861328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,10240,1.5258879661560059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,16384,2.110624074935913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,10240,1.5644479990005493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,12288,1.6035200357437134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,10240,1.339136004447937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,8192,1.2341439723968506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,8192,1.2552640438079834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,8192,1.1017919778823853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,7168,1.0822720527648926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,7168,1.0981119871139526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,6144,0.9368320107460022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,7168,0.9687359929084778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,6144,0.9700480103492737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,5120,0.8093119859695435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,5120,0.7910400032997131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,4096,0.6473600268363953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,6144,0.8405759930610657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,4096,0.6669759750366211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,4096,0.5804160237312317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,5120,0.7132160067558289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,3584,0.5904639959335327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,3584,0.5752320289611816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,3584,0.5205439925193787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,3072,0.511135995388031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,3072,0.4976319968700409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,2560,0.42502400279045105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,65536,11.994912147521973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,3072,0.4566720128059387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,2048,0.34908801317214966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,2560,0.4310399889945984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,2560,0.39209601283073425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,1536,0.27487999200820923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,2048,0.3606080114841461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,1536,0.28467199206352234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,2048,0.3280639946460724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,768,0.16547200083732605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,1024,0.20748800039291382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,1024,0.20175999402999878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,65536,11.292832374572754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,1536,0.26070401072502136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,1024,0.1976960003376007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,768,0.17129600048065186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,512,0.12806400656700134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,512,0.12495999783277512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,768,0.16441600024700165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,256,0.06790400296449661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,256,0.06860800087451935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,512,0.13340799510478973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,128,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,128,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,256,0.10198400169610977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,64,0.05347200110554695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,64,0.05145600065588951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,128,0.08128000050783157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,32,0.056384000927209854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,32,0.056703999638557434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,64,0.07161600142717361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,32,0.07228799909353256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,16384,1.840831995010376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,12288,1.3879679441452026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,12288,1.4048960208892822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,16384,1.8135360479354858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,16384,1.5845439434051514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,10240,1.1459840536117554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,10240,1.1684160232543945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,12288,1.2140159606933594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,8192,0.9259840250015259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,8192,0.9450880289077759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,7168,0.8366720080375671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,10240,1.013856053352356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,7168,0.8129919767379761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,8192,0.8276159763336182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,65536,8.801216125488281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,6144,0.7111039757728577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,7168,0.7280960083007812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,6144,0.7263360023498535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,5120,0.5951359868049622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,5120,0.6145600080490112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,6144,0.6376960277557373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,4096,0.4880959987640381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,3584,0.4342080056667328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,5120,0.5369600057601929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,4096,0.5018240213394165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,3584,0.44441598653793335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,4096,0.4407680034637451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,65536,7.993855953216553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,3072,0.3877440094947815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,3584,0.3951359987258911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,3072,0.3776000142097473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,65536,8.434080123901367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,2560,0.32182401418685913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,3072,0.3436479866504669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,2048,0.27430400252342224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,2560,0.3301120102405548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,2048,0.2685120105743408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,1536,0.2099200040102005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,1536,0.21619200706481934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,2560,0.2961600124835968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,1024,0.15382400155067444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,1024,0.1592639982700348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,768,0.12742400169372559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,2048,0.24767999351024628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,1536,0.19804799556732178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,1024,0.1499200016260147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,768,0.1303360015153885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,512,0.09375999867916107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,512,0.10016000270843506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,256,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,768,0.1255040019750595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,256,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,128,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,512,0.10294400155544281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,256,0.07827199995517731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,128,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,64,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,128,0.06335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,64,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,32,0.04566400125622749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,32,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,64,0.056384000927209854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,32,0.05558399856090546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,16384,1.5495359897613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,16384,1.5089919567108154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,12288,1.1500799655914307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,16384,1.3216639757156372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,12288,1.1811200380325317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,10240,0.953760027885437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,65536,7.047967910766602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,12288,1.0048960447311401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,10240,0.977728009223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,8192,0.7711359858512878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,8192,0.7889279723167419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,10240,0.8529599905014038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,7168,0.6795520186424255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,7168,0.7055040001869202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,8192,0.6854400038719177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,6144,0.6004160046577454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,6144,0.6069440245628357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,7168,0.6073920130729675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,5120,0.5039680004119873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,6144,0.5278400182723999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,4096,0.4166400134563446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,5120,0.5172799825668335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,4096,0.40934398770332336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,5120,0.4480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,65536,7.158815860748291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,3584,0.3742400109767914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,3584,0.3662079870700836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,3072,0.3129279911518097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,65536,7.004415988922119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,2560,0.27459201216697693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,4096,0.36719998717308044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,3072,0.325408011674881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,2560,0.2704319953918457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,3584,0.32793599367141724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,3072,0.2901439964771271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,2048,0.22812800109386444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,2048,0.22368000447750092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,1536,0.17776000499725342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,2048,0.20710399746894836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,1536,0.18118399381637573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,2560,0.24755200743675232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,1024,0.13036799430847168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,1024,0.13302400708198547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,1536,0.16764800250530243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,768,0.10819199681282043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,768,0.11084800213575363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,512,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,1024,0.12627199292182922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,512,0.08236800134181976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,768,0.10598400235176086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,256,0.04787199944257736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,256,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,512,0.08620800077915192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,128,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,128,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,256,0.06639999896287918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,128,0.05350400134921074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,64,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,64,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,32,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,32,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,64,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,32,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,65536,4.871679782867432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,12288,0.9210240244865417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,65536,5.496799945831299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,16384,1.2311680316925049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,16384,1.2083200216293335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,12288,0.9371520280838013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,10240,0.7641599774360657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,16384,1.0667519569396973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,10240,0.7816640138626099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,65536,5.183135986328125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,12288,0.8046079874038696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,7168,0.5615040063858032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,8192,0.6200640201568604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,8192,0.6394879817962646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,10240,0.6789119839668274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,7168,0.5507519841194153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,6144,0.47276800870895386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,5120,0.40937599539756775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,8192,0.5496000051498413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,6144,0.48476800322532654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,5120,0.3988800048828125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,7168,0.4872319996356964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,6144,0.4232639968395233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,4096,0.32742398977279663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,4096,0.33980798721313477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,5120,0.36239999532699585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,3072,0.25299200415611267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,3584,0.2919360101222992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,3584,0.3022719919681549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,4096,0.2943679988384247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,3072,0.2634879946708679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,2560,0.2163199931383133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,2560,0.2247679978609085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,3584,0.26284798979759216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,3072,0.2327360063791275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,2048,0.18435199558734894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,2048,0.18009600043296814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,2560,0.19871999323368073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,1536,0.14601600170135498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,1024,0.10592000186443329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,2048,0.16662399470806122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,1024,0.10831999778747559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,1536,0.13497599959373474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,768,0.0910400003194809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,768,0.0867839977145195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,1024,0.10175999999046326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,512,0.06924799829721451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,512,0.07161600142717361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,768,0.08591999858617783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,1536,0.1422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,256,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,512,0.0697920024394989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,256,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,128,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,256,0.05427199974656105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,128,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,64,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,64,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,128,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,32,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,32,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,64,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,32,0.04032000154256821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,12288,0.8080000281333923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,16384,1.0722559690475464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,16384,1.0560640096664429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,12288,0.8282880187034607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,16384,0.926751971244812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,10240,0.6707199811935425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,12288,0.7055040001869202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,10240,0.6860799789428711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,8192,0.5485439896583557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,65536,4.253439903259277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,10240,0.593887984752655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,8192,0.551904022693634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,7168,0.4838399887084961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,7168,0.4949760138988495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,8192,0.48185598850250244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,6144,0.4191040098667145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,5120,0.34969601035118103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,6144,0.4280959963798523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,7168,0.4278720021247864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,5120,0.3643519878387451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,6144,0.37110400199890137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,65536,5.011263847351074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,4096,0.289792001247406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,4096,0.29686400294303894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,5120,0.3155199885368347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,3584,0.2550399899482727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,65536,4.7869439125061035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,3584,0.2643199861049652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,3072,0.2245119959115982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,4096,0.25753599405288696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,3072,0.22761599719524384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,3584,0.2325119972229004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,2560,0.19203199446201324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,3072,0.20265600085258484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,2560,0.19475199282169342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,1536,0.12988799810409546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,2048,0.15750400722026825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,2560,0.17449599504470825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,2048,0.16281600296497345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,1536,0.12703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,2048,0.14748799800872803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,1024,0.09609600156545639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,1024,0.09350399672985077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,1536,0.11846400052309036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,768,0.0801599994301796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,768,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,1024,0.08963199704885483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,768,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,256,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,512,0.061216000467538834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,512,0.058687999844551086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,256,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,256,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,512,0.06147199869155884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,128,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,128,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,64,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,64,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,128,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,64,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,32,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,32,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,32,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,12288,0.696832001209259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,65536,3.6310079097747803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,16384,0.9266560077667236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,16384,0.9088960289955139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,65536,4.032959938049316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,10240,0.5788159966468811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,12288,0.7135040163993835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,16384,0.7953280210494995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,10240,0.5899839997291565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,8192,0.4818879961967468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,7168,0.4272319972515106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,12288,0.6048319935798645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,8192,0.4684160053730011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,10240,0.5099200010299683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,7168,0.41465601325035095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,6144,0.36556801199913025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,8192,0.4145919978618622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,5120,0.3017599880695343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,6144,0.36022400856018066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,5120,0.3110719919204712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,7168,0.36959999799728394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,6144,0.31887999176979065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,4096,0.24883200228214264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,4096,0.2533760070800781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,5120,0.27136000990867615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,4096,0.2226240038871765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,3072,0.1921280026435852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,3584,0.22271999716758728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,3072,0.19628800451755524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,3584,0.22707200050354004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,3584,0.19836799800395966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,2048,0.1372479945421219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,2560,0.16908800601959229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,2560,0.16470399498939514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,3072,0.17507199943065643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,2560,0.15158399939537048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,2048,0.13996799290180206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,1536,0.11046399921178818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,1536,0.1125440001487732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,2048,0.12591999769210815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,1536,0.10208000242710114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,1024,0.08160000294446945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,1024,0.0830719992518425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,768,0.06806399673223495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,1024,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,65536,4.288703918457031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,768,0.06982400268316269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,512,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,512,0.051711998879909515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,768,0.0658240020275116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,512,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,256,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,256,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,256,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,128,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,128,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,128,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,64,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,64,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,64,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,32,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,32,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,32,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,12288,0.5743680000305176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,65536,3.031872034072876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,12288,0.5939520001411438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,16384,0.7748159766197205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,16384,0.7533760070800781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,10240,0.48364800214767456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,16384,0.6642240285873413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,10240,0.4946880042552948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,65536,3.086400032043457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,8192,0.3956480026245117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,12288,0.5097600221633911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,10240,0.423552006483078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,8192,0.4026240110397339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,7168,0.34726399183273315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,6144,0.3070720136165619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,7168,0.3564800024032593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,8192,0.34617599844932556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,6144,0.2973119914531708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,5120,0.25996801257133484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,7168,0.3099839985370636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,5120,0.2565760016441345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,4096,0.20988799631595612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,6144,0.26713600754737854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,4096,0.21507200598716736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,3584,0.18780800700187683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,5120,0.22700800001621246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,4096,0.18783999979496002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,3584,0.19232000410556793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,3072,0.16140800714492798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,3072,0.16764800250530243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,2560,0.13939200341701508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,3584,0.16704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,3072,0.147039994597435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,2560,0.14313599467277527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,2048,0.11971200257539749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,2048,0.11564800143241882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,1536,0.09440000355243683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,2560,0.12617599964141846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,1536,0.0960640013217926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,65536,4.006368160247803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,2048,0.10662399977445602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,1024,0.07088000327348709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,1024,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,768,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,1536,0.08633600175380707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,1024,0.06598400324583054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,768,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,512,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,768,0.056095998734235764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,512,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,256,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,512,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,256,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,128,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,64,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,256,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,128,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,64,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,64,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,32,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,32,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,32,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,128,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,12288,0.46000000834465027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,16384,0.6218240261077881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,16384,0.6085119843482971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,65536,2.4656319618225098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,12288,0.47491198778152466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,65536,2.989567995071411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,16384,0.5317760109901428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,10240,0.38979199528694153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,10240,0.3949120044708252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,8192,0.3184320032596588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,12288,0.4047040045261383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,8192,0.32393598556518555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,10240,0.3418880105018616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,7168,0.2804799973964691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,7168,0.2871040105819702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,6144,0.2473279982805252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,8192,0.2773439884185791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,6144,0.24220800399780273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,7168,0.24633599817752838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,5120,0.20713600516319275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,5120,0.2125760018825531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,4096,0.16908800601959229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,6144,0.21302400529384613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,4096,0.1728000044822693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,5120,0.1826239973306656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,65536,2.4271039962768555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,3584,0.1520639955997467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,3584,0.15360000729560852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,3072,0.130048006772995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,4096,0.15014399588108063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,3072,0.133760005235672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,3584,0.13353599607944489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,2048,0.09436800330877304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,2560,0.11244799941778183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,2560,0.11555200070142746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,3072,0.1191679984331131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,2560,0.10255999863147736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,2048,0.09647999703884125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,2048,0.08553600311279297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,1536,0.0764160007238388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,1536,0.07791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,1024,0.059039998799562454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,1024,0.057151999324560165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,1536,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,1024,0.05382400006055832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,768,0.05225599929690361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,768,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,512,0.03654399886727333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,768,0.04569600149989128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,512,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,256,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,512,0.037408001720905304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,256,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,128,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,256,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,128,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,128,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,64,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,64,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,32,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,64,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,32,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,32,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,65536,2.3941760063171387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,12288,0.41100800037384033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,16384,0.5301439762115479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,16384,0.5453119874000549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,16384,0.5278080105781555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,12288,0.42447999119758606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,10240,0.3444159924983978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,10240,0.3524479866027832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,12288,0.4052160084247589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,8192,0.2828480005264282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,8192,0.29104000329971313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,7168,0.24969600141048431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,10240,0.3387199938297272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,8192,0.27718400955200195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,7168,0.2561280131340027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,6144,0.21987199783325195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,6144,0.22137600183486938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,7168,0.24579200148582458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,5120,0.19430400431156158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,5120,0.18905599415302277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,4096,0.1555519998073578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,6144,0.2146880030632019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,65536,2.4662399291992188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,65536,2.098720073699951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,3584,0.1409280002117157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,4096,0.15967999398708344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,4096,0.1502079963684082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,3584,0.14444799721240997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,3072,0.125791996717453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,3072,0.12671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,2560,0.10982400178909302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,3584,0.1340479999780655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,3072,0.11872000247240067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,2560,0.1128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,2048,0.09308800101280212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,2560,0.10252799838781357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,5120,0.18115200102329254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,2048,0.0944959968328476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,1536,0.07715199887752533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,2048,0.08617600053548813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,1536,0.07788799703121185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,1024,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,1536,0.06960000097751617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,1024,0.058720000088214874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,1024,0.05363199859857559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,768,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,768,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,512,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,512,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,768,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,512,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,256,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,256,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,256,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,128,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,128,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,128,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,64,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,64,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,64,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,32,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,32,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,32,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,65536,2.1296000480651855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,16384,0.4720959961414337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,12288,0.35712000727653503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,12288,0.3526720106601715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,10240,0.2948479950428009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,16384,0.46006399393081665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,65536,2.3308799266815186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,16384,0.40137600898742676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,12288,0.3080959916114807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,10240,0.30003198981285095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,8192,0.2380480021238327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,7168,0.2115519940853119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,10240,0.2584640085697174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,8192,0.24358400702476501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,7168,0.21833600103855133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,8192,0.21011200547218323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,6144,0.1836480051279068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,7168,0.18780800700187683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,6144,0.18854400515556335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,6144,0.16288000345230103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,5120,0.16128000617027283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,5120,0.15545600652694702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,4096,0.1318719983100891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,5120,0.13948799669742584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,4096,0.12748800218105316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,3584,0.11523199826478958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,3584,0.11798399686813354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,4096,0.11459200084209442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,3584,0.10204800218343735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,3072,0.1021760031580925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,3072,0.1042879968881607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,65536,1.7987200021743774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,2560,0.08755200356245041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,2048,0.08966399729251862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,3072,0.09068799763917923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,2560,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,2048,0.07286400347948074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,2560,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,1536,0.059007998555898666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,2048,0.06579200178384781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,1024,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,1024,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,1536,0.0615679994225502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,1536,0.05455999821424484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,768,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,1024,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,512,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,512,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,768,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,768,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,512,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,256,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,256,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,128,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,128,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,256,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,128,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,64,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,64,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,64,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,32,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,32,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,32,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,65536,1.5463999509811401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,16384,0.4158720076084137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,12288,0.300927996635437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,12288,0.30671998858451843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,16384,0.39769598841667175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,10240,0.2536639869213104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,12288,0.3054400086402893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,16384,0.3978239893913269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,10240,0.2640640139579773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,8192,0.20899200439453125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,8192,0.21564799547195435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,10240,0.25571200251579285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,7168,0.18668800592422485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,8192,0.2099200040102005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,7168,0.19065600633621216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,7168,0.1860799938440323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,6144,0.16275200247764587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,6144,0.16652800142765045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,5120,0.14153599739074707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,65536,1.500480055809021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,6144,0.1626880019903183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,65536,1.4784640073776245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,5120,0.14399999380111694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,4096,0.12137600034475327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,4096,0.11801599711179733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,5120,0.1375039964914322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,3584,0.10915199667215347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,3072,0.09561599791049957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,4096,0.11353600025177002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,3584,0.10979200154542923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,3072,0.09753599762916565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,3584,0.10246399790048599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,3072,0.08975999802350998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,2560,0.08659200370311737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,2560,0.0854400023818016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,2048,0.0724480003118515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,2560,0.077504001557827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,2048,0.0737600028514862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,1536,0.06204799935221672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,2048,0.06604799628257751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,1536,0.05734400078654289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,1024,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,1024,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,1536,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,768,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,1024,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,768,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,512,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,768,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,512,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,512,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,256,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,65536,1.811519980430603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,256,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,128,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,128,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,256,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,128,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,64,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,64,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,32,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,64,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,32,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,32,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,12288,0.24240000545978546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,16384,0.3160319924354553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,12288,0.23747199773788452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,16384,0.3094399869441986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,12288,0.20681600272655487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,10240,0.19948799908161163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,16384,0.26867198944091797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,10240,0.20416000485420227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,65536,1.2934080362319946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,65536,1.1943360567092896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,8192,0.16300800442695618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,7168,0.14707200229167938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,10240,0.17260800302028656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,8192,0.16575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,7168,0.14348800480365753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,8192,0.14134399592876434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,7168,0.12566399574279785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,6144,0.12559999525547028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,6144,0.12857599556446075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,5120,0.10976000130176544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,5120,0.10857599973678589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,6144,0.10918399691581726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,4096,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,5120,0.09404800087213516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,4096,0.09113600105047226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,4096,0.07868800312280655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,3584,0.07968000322580338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,3584,0.08156800270080566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,3584,0.070592001080513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,3072,0.07222399860620499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,3072,0.07014399766921997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,3072,0.06217600032687187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,2560,0.06204799935221672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,2560,0.06332799792289734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,2048,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,2048,0.052671998739242554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,2560,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,2048,0.04620800167322159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,1536,0.043776001781225204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,1536,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,1024,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,1024,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,768,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,768,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,1024,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,512,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,768,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,512,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,256,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,256,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,512,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,1536,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,256,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,128,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,65536,1.0334080457687378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,128,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,64,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,64,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,128,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,64,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,32,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,32,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,32,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,16384,0.23875199258327484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,16384,0.2436479926109314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,12288,0.18963199853897095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,12288,0.18716800212860107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,10240,0.15891200304031372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,16384,0.26848000288009644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,12288,0.20537599921226501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,10240,0.16307200491428375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,10240,0.17340800166130066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,8192,0.1313920021057129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,7168,0.11807999759912491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,8192,0.13468800485134125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,8192,0.14159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,7168,0.12057600170373917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,7168,0.12531200051307678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,6144,0.10521599650382996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,6144,0.107744000852108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,5120,0.09200000017881393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,6144,0.1106560006737709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,5120,0.09328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,65536,0.8781120181083679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,4096,0.07862400263547897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,65536,0.9699519872665405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,5120,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,4096,0.07887999713420868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,3584,0.07209599763154984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,4096,0.07734400033950806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,3584,0.07347200065851212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,3072,0.06428799778223038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,3584,0.06880000233650208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,3072,0.06496000289916992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,2560,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,3072,0.060256000608205795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,2560,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,2560,0.05196800082921982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,2048,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,2048,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,2048,0.044319998472929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,65536,1.0413119792938232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,1536,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,1536,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,1536,0.03545600175857544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,1024,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,1024,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,1024,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,768,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,768,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,512,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,768,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,512,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,256,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,512,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,256,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,128,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,128,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,256,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,64,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,128,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,64,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,32,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,32,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,12288,0.12118399888277054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,12288,0.12467200309038162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,16384,0.15910400450229645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,16384,0.1624000072479248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,16384,0.13711999356746674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,12288,0.10547199845314026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,8192,0.08524800091981888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,10240,0.1064319983124733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,10240,0.08950400352478027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,10240,0.1034879982471466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,8192,0.08713600039482117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,7168,0.07622399926185608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,8192,0.07331199944019318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,7168,0.0772479996085167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,6144,0.06694400310516357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,65536,0.603488028049469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,65536,0.6736000180244446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,7168,0.06601600348949432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,6144,0.10764800012111664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,5120,0.05833600088953972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,6144,0.05766399949789047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,5120,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,4096,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,4096,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,5120,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,4096,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,3584,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,65536,0.5200639963150024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,3584,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,3072,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,3072,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,3584,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,3072,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,2560,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,2560,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,2048,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,2560,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,2048,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,2048,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,1536,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,1536,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,1536,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,1024,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,1024,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,768,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,768,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,1024,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,512,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,768,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,256,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,512,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,512,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,256,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,256,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,128,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,128,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,128,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,64,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,32,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,12288,0.11903999745845795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,16384,0.15503999590873718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,12288,0.1210239976644516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,16384,0.15091200172901154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,16384,0.13699199259281158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,10240,0.10320000350475311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,12288,0.10595200210809708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,10240,0.10419200360774994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,8192,0.08550400286912918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,10240,0.08915200084447861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,8192,0.0862400010228157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,7168,0.07715199887752533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,8192,0.07366400212049484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,6144,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,7168,0.07772800326347351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,6144,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,7168,0.06572800129652023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,65536,0.5572159886360168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,65536,0.5676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,6144,0.05817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,5120,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,4096,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,5120,0.059647999703884125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,4096,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,3584,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,5120,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,4096,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,65536,0.5199360251426697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,3584,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,3072,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,3584,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,2560,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,3072,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,3072,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,2560,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,2048,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,2560,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,2048,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,1536,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,2048,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,1536,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,1536,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,1024,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,1024,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,1024,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,768,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,768,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,512,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,768,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,512,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,512,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,256,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,128,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,32,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,64,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,32,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,32,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,12288,0.06880000233650208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,16384,0.08841600269079208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,16384,0.09200000017881393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,16384,0.09772799909114838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,12288,0.07168000191450119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,12288,0.0759039968252182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,10240,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,10240,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,8192,0.050624001771211624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,65536,0.3138880133628845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,65536,0.31670400500297546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,10240,0.06540799885988235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,8192,0.05084799975156784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,8192,0.05299200117588043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,7168,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,7168,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,6144,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,7168,0.047807998955249786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,6144,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,6144,0.0488319993019104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,4096,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,5120,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,4096,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,5120,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,5120,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,4096,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,65536,0.36694398522377014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,3584,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,3584,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,3072,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,3584,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,3072,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,3072,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,2560,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,2560,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,2048,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,2560,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,2048,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,1536,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,2048,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,1536,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,1024,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,1024,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,1536,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,1024,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,512,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,128,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,32,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,12288,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,12288,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,16384,0.06195199862122536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,16384,0.06255999952554703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,16384,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,12288,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,10240,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,10240,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,10240,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,8192,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,8192,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,8192,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,7168,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,65536,0.19580799341201782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,65536,0.19177600741386414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,7168,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,6144,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,6144,0.03209599852561951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,65536,0.20393599569797516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,4096,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,5120,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,6144,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,5120,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,5120,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,4096,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,3584,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,3072,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,3584,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,3072,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,2560,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,4096,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,2560,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,2048,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,2048,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,1536,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,1536,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,1024,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,12288,0.04335999861359596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,12288,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,16384,0.05491200089454651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,16384,0.05558399856090546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,16384,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,12288,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,10240,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,8192,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,10240,0.04073600098490715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,10240,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,65536,0.15756799280643463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,8192,0.058720000088214874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,8192,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,7168,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,7168,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,7168,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,65536,0.1607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,6144,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,6144,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,6144,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,5120,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,65536,0.12249600142240524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,4096,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,5120,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,4096,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,5120,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,3584,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,2560,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,3584,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,3072,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,3072,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,2048,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,2048,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,1536,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,1536,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,768,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,512,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,32,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,12288,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,12288,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,16384,0.05478399991989136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,16384,0.05737600103020668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,16384,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,12288,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,10240,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,10240,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,10240,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,8192,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,8192,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,8192,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,7168,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,7168,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,7168,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,65536,0.16470399498939514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,6144,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,6144,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,65536,0.12198399752378464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,6144,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,5120,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,5120,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,5120,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,65536,0.15798400342464447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,4096,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,4096,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,4096,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,3584,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,3072,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,2560,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,2048,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,1536,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,1024,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,768,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,12288,0.04560000076889992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,12288,0.04726399853825569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,16384,0.05423999950289726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,16384,0.05801599845290184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,16384,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,12288,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,10240,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,8192,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,10240,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,10240,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,8192,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,7168,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,8192,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,7168,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,7168,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,65536,0.15836800634860992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,65536,0.15663999319076538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,6144,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,65536,0.11820799857378006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,6144,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,6144,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,5120,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,5120,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,5120,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,4096,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,3584,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,4096,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,3584,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,3072,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,3072,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,3072,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,2048,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,2560,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,512,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,10240,3.013535976409912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,10240,2.9742400646209717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,12288,3.588479995727539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,12288,3.521087884902954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,16384,4.747551918029785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,16384,4.66374397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,12288,3.204576015472412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,8192,2.445120096206665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,8192,2.409087896347046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,6144,1.86844801902771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,7168,2.104991912841797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,7168,2.1586880683898926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,16384,4.184192180633545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,8192,2.180959939956665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,10240,2.6949760913848877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,6144,1.815392017364502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,4096,1.273568034172058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,5120,1.5361599922180176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,5120,1.571679949760437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,4096,1.245408058166504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,7168,1.9348160028457642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,6144,1.6889920234680176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,3584,1.1022080183029175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,3584,1.1401920318603516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,4096,1.16048002243042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,5120,1.4230400323867798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,3072,0.9888319969177246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,3072,0.9619839787483215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,2560,0.8230400085449219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,3584,1.0354880094528198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,2560,0.8528640270233154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,3072,0.9111999869346619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,2048,0.6792960166931152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,2048,0.7019199728965759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,1536,0.538752019405365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,1536,0.5505279898643494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,1024,0.3993920087814331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,2048,0.6456320285797119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,1024,0.40227198600769043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,2560,0.7752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,768,0.32368001341819763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,768,0.3286080062389374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,256,0.12876799702644348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,1536,0.5202879905700684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,1024,0.38710400462150574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,512,0.22044800221920013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,512,0.23740799725055695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,256,0.12800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,128,0.10425599664449692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,128,0.10380800068378448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,512,0.25836798548698425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,64,0.10246399790048599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,768,0.3253119885921478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,64,0.10188800096511841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,256,0.19593599438667297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,128,0.1555200070142746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,32,0.10480000078678131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,32,0.10627199709415436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,64,0.13548800349235535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,32,0.13648000359535217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,16384,1.2295680046081543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,12288,0.9194560050964355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,16384,1.204543948173523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,10240,0.7662400007247925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,10240,0.7885439991950989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,12288,0.9377599954605103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,12288,0.8059200048446655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,8192,0.6315199732780457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,16384,1.0579520463943481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,8192,0.6170240044593811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,7168,0.5439680218696594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,7168,0.5814399719238281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,10240,0.6793599724769592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,8192,0.550495982170105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,6144,0.5039359927177429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,6144,0.4696959853172302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,7168,0.4867520034313202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,5120,0.4023680090904236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,4096,0.3323200047016144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,6144,0.4236159920692444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,5120,0.412416011095047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,4096,0.32892799377441406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,3584,0.2900800108909607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,5120,0.35894399881362915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,3584,0.29846400022506714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,3072,0.25459200143814087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,4096,0.29494398832321167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,3584,0.26310399174690247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,3072,0.25891199707984924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,2560,0.2202879935503006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,2560,0.2152000069618225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,2048,0.17667199671268463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,2048,0.18272000551223755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,3072,0.23132799565792084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,1536,0.1457280069589615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,65536,4.860511779785156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,2048,0.16755199432373047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,2560,0.1990399956703186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,1536,0.1422719955444336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,1024,0.10627199709415436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,768,0.08256000280380249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,768,0.08364800363779068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,1024,0.10844799876213074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,1536,0.13542400300502777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,1024,0.10275200009346008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,65536,4.7618560791015625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,768,0.08560000360012054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,512,0.0597120001912117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,512,0.0607680007815361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,256,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,256,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,512,0.07091200351715088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,128,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,128,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,64,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,64,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,256,0.05488000065088272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,128,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,64,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,32,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,32,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,32,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,16384,0.9253439903259277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,12288,0.7036799788475037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,12288,0.692575991153717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,16384,0.9072319865226746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,16384,0.7960000038146973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,10240,0.5802559852600098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,12288,0.6046720147132874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,10240,0.5947520136833191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,8192,0.46672001481056213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,8192,0.4765760004520416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,65536,4.112607955932617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,7168,0.420991986989975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,10240,0.5097919702529907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,8192,0.4142720103263855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,6144,0.3654400110244751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,7168,0.41283199191093445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,6144,0.36073601245880127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,7168,0.3669759929180145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,5120,0.3127039968967438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,5120,0.304639995098114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,6144,0.32025599479675293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,4096,0.24876800179481506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,5120,0.27084800601005554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,4096,0.2526400089263916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,3584,0.22124800086021423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,65536,3.631808042526245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,4096,0.22303999960422516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,3072,0.19280000030994415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,3584,0.22412799298763275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,3072,0.19660800695419312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,3584,0.19881600141525269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,65536,3.5523838996887207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,2560,0.1648000031709671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,2560,0.1700800061225891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,3072,0.17612800002098083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,2048,0.13625599443912506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,2560,0.15123200416564941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,2048,0.13900800049304962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,1536,0.10844799876213074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,1536,0.11184000223875046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,1024,0.08924800157546997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,2048,0.12668800354003906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,1024,0.08153600245714188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,1536,0.10396800190210342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,768,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,768,0.06441599875688553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,1024,0.07791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,512,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,768,0.06678400188684464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,256,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,512,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,256,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,128,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,128,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,256,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,64,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,128,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,64,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,32,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,32,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,64,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,512,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,32,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,12288,0.5777599811553955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,65536,3.1152639389038086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,16384,0.7743039727210999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,12288,0.5906239748001099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,16384,0.7566080093383789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,65536,3.08624005317688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,16384,0.66348797082901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,8192,0.3909760117530823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,10240,0.4979200065135956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,10240,0.4838719964027405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,8192,0.40243199467658997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,7168,0.35440000891685486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,10240,0.4256640076637268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,12288,0.5052800178527832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,6144,0.30508801341056824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,7168,0.3495039939880371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,6144,0.2999039888381958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,5120,0.26044800877571106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,8192,0.34598401188850403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,7168,0.3062399923801422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,5120,0.2549760043621063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,6144,0.2670400142669678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,4096,0.20812800526618958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,4096,0.21324799954891205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,4096,0.1860480010509491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,3584,0.18982400000095367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,5120,0.2285120040178299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,3072,0.16236799955368042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,3584,0.18425600230693817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,3584,0.16793599724769592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,3072,0.16441600024700165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,2560,0.13833600282669067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,2560,0.14207999408245087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,2560,0.12716799974441528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,2048,0.1157120019197464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,3072,0.1467839926481247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,2048,0.11801599711179733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,2048,0.10707200318574905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,1536,0.09344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,1536,0.09561599791049957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,1024,0.06947200000286102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,1024,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,1536,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,768,0.05532800033688545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,768,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,65536,2.957279920578003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,768,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,1024,0.06723199784755707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,512,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,512,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,256,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,128,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,512,0.046911999583244324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,256,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,128,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,256,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,128,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,64,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,64,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,32,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,64,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,32,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,32,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,12288,0.46185600757598877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,16384,0.6199039816856384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,16384,0.6080639958381653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,10240,0.644320011138916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,12288,0.47443199157714844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,65536,2.411263942718506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,65536,2.3862080574035645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,16384,0.5314880013465881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,12288,0.4063679873943329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,8192,0.3146879971027374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,10240,0.3964479863643646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,65536,2.572864055633545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,7168,0.2847679853439331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,7168,0.27884799242019653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,8192,0.32444798946380615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,10240,0.3431360125541687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,6144,0.24831999838352203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,5120,0.2072959989309311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,8192,0.2768000066280365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,7168,0.24723200500011444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,6144,0.21401600539684296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,5120,0.20908799767494202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,6144,0.251583993434906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,5120,0.18268799781799316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,4096,0.17126399278640747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,4096,0.16873599588871002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,3584,0.14956800639629364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,3584,0.1523520052433014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,4096,0.15001599490642548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,3072,0.13052800297737122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,3072,0.13344000279903412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,2560,0.11209599673748016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,2560,0.11535999923944473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,2048,0.09302400052547455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,3584,0.13468800485134125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,3072,0.11913599818944931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,2048,0.09520000219345093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,2560,0.10310400277376175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,2048,0.08617600053548813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,1536,0.07692799717187881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,1536,0.07558400183916092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,1024,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,1024,0.05484800040721893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,1536,0.07119999825954437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,1024,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,768,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,512,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,768,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,512,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,768,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,256,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,512,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,256,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,128,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,256,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,128,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,128,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,64,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,64,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,32,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,64,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,32,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,32,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,12288,0.40537598729133606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,16384,0.5364159941673279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,12288,0.4126720130443573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,16384,0.5303040146827698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,65536,2.082751989364624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,65536,2.0226240158081055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,10240,0.3421120047569275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,16384,0.5360959768295288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,12288,0.4050559997558594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,65536,2.0591681003570557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,10240,0.34966400265693665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,7168,0.2512640058994293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,8192,0.2852480113506317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,8192,0.28092798590660095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,6144,0.21644799411296844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,10240,0.34169599413871765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,7168,0.2569600045681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,6144,0.22172799706459045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,8192,0.27929601073265076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,7168,0.2465279996395111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,5120,0.19011199474334717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,5120,0.1924159973859787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,6144,0.21459199488162994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,4096,0.15465599298477173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,4096,0.1599999964237213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,5120,0.18223999440670013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,3072,0.1244800016283989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,3072,0.12732799351215363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,3584,0.14268800616264343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,3584,0.13964800536632538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,4096,0.14985600113868713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,3584,0.13382400572299957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,3072,0.11788800358772278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,2048,0.09136000275611877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,2560,0.10835199803113937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,2560,0.10332799702882767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,2048,0.08505599945783615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,2560,0.10246399790048599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,2048,0.0859839990735054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,1024,0.049056001007556915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,1536,0.06883200258016586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,1536,0.07657600194215775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,1536,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,1024,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,1024,0.054016001522541046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,768,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,768,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,512,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,512,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,768,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,512,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,256,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,256,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,256,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,128,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,128,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,64,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,128,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,64,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,32,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,64,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,32,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,16384,0.46748799085617065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,16384,0.45718398690223694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,32,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,12288,0.35631999373435974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,12288,0.3473599851131439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,10240,0.29971200227737427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,10240,0.2925119996070862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,65536,2.058687925338745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,65536,1.7780799865722656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,16384,0.3983680009841919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,8192,0.23718400299549103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,12288,0.3052160143852234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,7168,0.2099519968032837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,8192,0.24620799720287323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,7168,0.21526400744915009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,10240,0.2582400143146515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,6144,0.1831360012292862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,8192,0.20950399339199066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,6144,0.18851199746131897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,7168,0.18611200153827667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,5120,0.1590079963207245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,5120,0.15590399503707886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,4096,0.13087999820709229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,4096,0.12780800461769104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,6144,0.16198399662971497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,5120,0.139615997672081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,3584,0.11468800157308578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,3584,0.11673600226640701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,3072,0.09980800002813339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,4096,0.11327999830245972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,3072,0.10320000350475311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,3584,0.10268799960613251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,2560,0.09878399968147278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,2560,0.08662399649620056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,3072,0.09055999666452408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,2560,0.07849600166082382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,2048,0.07283200323581696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,65536,1.8098880052566528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,2048,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,1536,0.05897599831223488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,2048,0.06604799628257751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,1536,0.06950400024652481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,1024,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,1024,0.04368000105023384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,1536,0.05455999821424484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,768,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,768,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,512,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,1024,0.042688000947237015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,512,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,768,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,256,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,512,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,256,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,128,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,128,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,256,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,128,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,64,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,64,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,64,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,32,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,32,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,32,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,12288,0.2956160008907318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,16384,0.3911040127277374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,16384,0.38700801134109497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,65536,1.5584959983825684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,12288,0.3017599880695343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,10240,0.25833600759506226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,10240,0.25257599353790283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,16384,0.40108799934387207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,65536,1.7771199941635132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,12288,0.30486398935317993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,10240,0.25804799795150757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,8192,0.20735999941825867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,7168,0.185248002409935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,8192,0.2104319930076599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,7168,0.18863999843597412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,8192,0.20982399582862854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,6144,0.1608320027589798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,6144,0.16291199624538422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,7168,0.18636800348758698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,5120,0.1409599930047989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,6144,0.1624000072479248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,5120,0.1430719941854477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,4096,0.11721599847078323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,5120,0.13897599279880524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,4096,0.1199679970741272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,4096,0.11379200220108032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,3584,0.10780800133943558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,3072,0.09481599926948547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,3584,0.10646399855613708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,65536,1.5573760271072388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,3584,0.10233599692583084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,3072,0.08806400001049042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,2560,0.0851840004324913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,3072,0.0899519994854927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,2048,0.07075200229883194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,2048,0.06332799792289734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,2560,0.07791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,1536,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,2048,0.06547199934720993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,1536,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,1024,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,1536,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,1024,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,768,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,1024,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,768,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,512,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,768,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,2560,0.08416000008583069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,512,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,256,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,256,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,512,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,128,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,128,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,128,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,256,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,64,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,64,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,64,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,32,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,32,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,32,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,65536,1.539423942565918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,16384,0.30793601274490356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,12288,0.23708799481391907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,12288,0.24403199553489685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,16384,0.3170880079269409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,16384,0.2685120105743408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,10240,0.2019840031862259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,65536,1.1862720251083374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,10240,0.19964799284934998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,12288,0.20550400018692017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,8192,0.16153599321842194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,10240,0.17510400712490082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,8192,0.1640319973230362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,7168,0.1441279947757721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,7168,0.14764800667762756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,8192,0.1412159949541092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,6144,0.12588800489902496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,6144,0.12745599448680878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,5120,0.10659199953079224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,7168,0.12694400548934937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,5120,0.10870400071144104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,4096,0.0899519994854927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,6144,0.1098880022764206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,4096,0.0875839963555336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,5120,0.09417600184679031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,4096,0.07804799824953079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,3584,0.08064000308513641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,65536,1.2120959758758545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,3584,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,3072,0.07043199986219406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,3072,0.07222399860620499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,3584,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,2560,0.06217600032687187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,2560,0.06169600039720535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,3072,0.06268800050020218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,2560,0.05407999828457832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,2048,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,2048,0.052799999713897705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,2048,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,1536,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,1536,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,1024,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,1536,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,1024,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,1024,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,768,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,768,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,512,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,768,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,512,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,256,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,256,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,512,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,128,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,65536,1.0333119630813599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,128,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,256,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,128,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,32,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,64,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,64,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,64,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,32,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,32,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,12288,0.23824000358581543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,16384,0.3099519908428192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,16384,0.31436800956726074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,16384,0.2685120105743408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,10240,0.200095996260643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,65536,1.18777596950531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,12288,0.24246400594711304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,12288,0.20588800311088562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,10240,0.2027519941329956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,8192,0.1624000072479248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,8192,0.16486400365829468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,10240,0.17350399494171143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,7168,0.1443839967250824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,8192,0.14134399592876434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,7168,0.14643199741840363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,6144,0.12521600723266602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,7168,0.1268479973077774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,6144,0.12908799946308136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,5120,0.10867200046777725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,5120,0.10700800269842148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,6144,0.10982400178909302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,65536,1.221951961517334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,4096,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,5120,0.09385599941015244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,4096,0.0902400016784668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,3584,0.08060800284147263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,3584,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,4096,0.07731200009584427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,3072,0.07056000083684921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,3584,0.07017599791288376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,3072,0.07251200079917908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,2560,0.0613120011985302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,2560,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,3072,0.06204799935221672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,2048,0.051552001386880875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,2560,0.054496001452207565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,2048,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,2048,0.04700800031423569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,1536,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,1536,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,1024,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,1024,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,1536,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,1024,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,768,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,512,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,768,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,65536,1.0319039821624756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,768,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,512,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,256,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,512,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,256,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,128,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,128,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,256,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,128,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,64,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,64,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,32,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,64,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,32,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,32,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,12288,0.18537600338459015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,12288,0.18848000466823578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,16384,0.23849600553512573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,16384,0.2982400059700012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,10240,0.1621440052986145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,10240,0.15855999290943146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,12288,0.20505599677562714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,16384,0.2687999904155731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,10240,0.1738239973783493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,8192,0.13177600502967834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,8192,0.13302400708198547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,7168,0.11955200135707855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,8192,0.14185599982738495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,7168,0.11721599847078323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,6144,0.09974399954080582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,6144,0.10665600001811981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,7168,0.125791996717453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,5120,0.08595199882984161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,6144,0.10892800241708755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,5120,0.08806400001049042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,4096,0.07820799946784973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,5120,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,3584,0.07110399752855301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,65536,0.9062719941139221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,65536,0.8857600092887878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,4096,0.07148800045251846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,4096,0.07603199779987335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,3584,0.07273600250482559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,3072,0.05548800155520439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,3072,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,2560,0.058720000088214874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,3584,0.07030399888753891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,2560,0.05766399949789047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,3072,0.06102399900555611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,2560,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,2048,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,1536,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,2048,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,2048,0.044319998472929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,1536,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,1536,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,768,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,1024,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,65536,1.025439977645874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,768,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,1024,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,768,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,512,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,256,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,512,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,256,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,512,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,256,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,128,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,128,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,64,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,64,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,128,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,32,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,32,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,1024,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,12288,0.16076800227165222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,16384,0.20694400370121002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,12288,0.15798400342464447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,16384,0.20441600680351257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,16384,0.227743998169899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,12288,0.17507199943065643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,10240,0.13657599687576294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,10240,0.13840000331401825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,8192,0.11241599917411804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,10240,0.14908799529075623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,7168,0.10259199887514114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,8192,0.11526399850845337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,8192,0.11923199892044067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,7168,0.1035199984908104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,6144,0.09190399944782257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,6144,0.09046400338411331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,7168,0.10729599744081497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,5120,0.07974400371313095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,5120,0.0809599980711937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,6144,0.09299200028181076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,65536,0.7530879974365234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,5120,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,4096,0.06723199784755707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,4096,0.06678400188684464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,65536,0.7384639978408813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,3584,0.06070400029420853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,3584,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,4096,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,3072,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,3584,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,3072,0.05843200162053108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,3072,0.05283199995756149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,2560,0.04809600114822388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,2560,0.049247998744249344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,2048,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,2560,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,2048,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,2048,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,1536,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,65536,0.9110400080680847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,1536,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,1536,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,1024,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,1024,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,768,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,1024,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,512,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,512,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,768,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,256,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,512,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,768,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,256,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,256,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,128,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,128,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,64,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,64,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,128,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,32,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,32,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,16384,0.15936000645160675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,12288,0.12246400117874146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,12288,0.12540799379348755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,16384,0.1605439931154251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,16384,0.13782399892807007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,10240,0.10467199981212616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,12288,0.1056319996714592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,10240,0.10659199953079224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,65536,0.6154559850692749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,8192,0.08604799956083298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,10240,0.0894400030374527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,65536,0.5990080237388611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,8192,0.08723200112581253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,7168,0.07571200281381607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,7168,0.07891199737787247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,6144,0.06710399687290192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,8192,0.07382400333881378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,6144,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,7168,0.0655359998345375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,5120,0.05772799998521805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,5120,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,5120,0.05939200147986412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,4096,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,4096,0.04848000034689903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,3584,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,4096,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,3072,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,3584,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,3072,0.040832001715898514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,3584,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,6144,0.0692799985408783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,3072,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,2560,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,65536,0.5190719962120056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,2560,0.03545600175857544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,2560,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,1536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,2048,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,2048,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,2048,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,1536,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,1536,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,1024,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,1024,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,768,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,1024,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,768,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,768,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,512,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,256,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,512,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,256,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,128,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,32,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,64,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,32,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,12288,0.12099199742078781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,16384,0.15136000514030457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,12288,0.11807999759912491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,16384,0.15667200088500977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,16384,0.13814400136470795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,10240,0.10156799852848053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,10240,0.10438399761915207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,12288,0.10476800054311752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,8192,0.08963199704885483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,8192,0.08723200112581253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,65536,0.5489599704742432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,10240,0.08979199826717377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,7168,0.07929600030183792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,8192,0.07334399968385696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,6144,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,6144,0.06870400160551071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,7168,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,7168,0.0793600007891655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,6144,0.057151999324560165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,5120,0.05846399813890457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,65536,0.5648000240325928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,4096,0.052639998495578766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,5120,0.0597120001912117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,4096,0.049247998744249344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,4096,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,5120,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,3584,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,3584,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,3072,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,3584,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,65536,0.5150399804115295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,2560,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,3072,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,3072,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,2560,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,2560,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,2048,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,2048,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,1536,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,2048,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,1536,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,1536,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,1024,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,1024,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,768,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,768,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,1024,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,512,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,512,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,768,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,512,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,256,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,256,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,64,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,32,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,12288,0.07619199901819229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,16384,0.10566399991512299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,16384,0.10102400183677673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,12288,0.0700799971818924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,16384,0.09747199714183807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,12288,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,10240,0.06297600269317627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,8192,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,10240,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,8192,0.050624001771211624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,10240,0.06454399973154068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,8192,0.053568001836538315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,7168,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,7168,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,65536,0.32102400064468384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,65536,0.3134079873561859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,6144,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,7168,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,6144,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,6144,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,4096,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,5120,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,65536,0.3635520040988922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,5120,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,5120,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,4096,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,4096,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,3584,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,3584,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,3072,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,3072,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,2560,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,3072,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,2560,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,3584,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,2048,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,2560,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,1536,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,2048,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,1024,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,1536,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,1536,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,2048,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,1024,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,1024,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,768,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,768,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,256,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,32,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,12288,0.08006399869918823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,12288,0.06518399715423584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,16384,0.0772159993648529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,16384,0.07823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,16384,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,12288,0.06054399907588959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,10240,0.06841599941253662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,10240,0.059039998799562454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,8192,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,8192,0.05040000006556511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,10240,0.052352000027894974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,7168,0.04755200073122978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,8192,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,7168,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,6144,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,65536,0.2423039972782135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,65536,0.24035200476646423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,7168,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,6144,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,5120,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,6144,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,5120,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,4096,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,5120,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,65536,0.3081279993057251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,4096,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,4096,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,3584,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,3072,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,3584,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,3072,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,2560,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,3072,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,2560,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,2048,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,2560,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,1536,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,1536,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,1536,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,2048,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,2048,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,1024,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,1024,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,768,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,3584,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,768,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,1024,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,12288,0.05056000128388405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,12288,0.050335999578237534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,16384,0.061983998864889145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,16384,0.061792001128196716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,16384,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,12288,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,10240,0.05827200040221214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,10240,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,10240,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,8192,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,65536,0.20416000485420227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,8192,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,7168,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,8192,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,7168,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,6144,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,7168,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,6144,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,65536,0.17791999876499176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,5120,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,65536,0.20124800503253937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,5120,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,5120,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,4096,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,4096,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,3584,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,3584,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,3072,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,3072,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,3072,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,2560,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,2560,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,2048,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,1536,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,2048,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,2048,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,1536,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,1024,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,1024,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,512,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,32,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,12288,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,12288,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,16384,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,16384,0.05382400006055832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,12288,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,10240,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,10240,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,10240,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,8192,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,8192,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,7168,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,16384,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,8192,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,65536,0.114656001329422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,65536,0.11267200112342834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,7168,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,6144,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,7168,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,6144,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,5120,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,5120,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,6144,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,4096,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,5120,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,4096,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,65536,0.11846400052309036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,3584,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,3584,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,3072,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,3072,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,2560,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,2560,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,2048,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,1536,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,768,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,32,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,12288,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,12288,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,16384,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,16384,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,16384,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,12288,0.04652800038456917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,10240,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,10240,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,10240,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,8192,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,8192,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,65536,0.09804800152778625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,65536,0.097120001912117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,7168,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,8192,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,7168,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,6144,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,6144,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,7168,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,5120,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,5120,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,6144,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,4096,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,5120,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,4096,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,3584,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,65536,0.11807999759912491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,3584,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,2560,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,3072,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,2560,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,2048,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,1024,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,1536,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,16384,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,12288,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,16384,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,12288,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,16384,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,12288,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,10240,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,10240,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,10240,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,8192,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,8192,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,65536,0.09404800087213516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,7168,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,65536,0.09657599776983261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,8192,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,7168,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,7168,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,6144,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,6144,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,5120,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,5120,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,4096,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,65536,0.11769600212574005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,5120,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,4096,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,3584,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,3072,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,2048,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,2560,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,1536,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,2048,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,1536,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,768,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,12288,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,12288,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,16384,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,16384,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,16384,0.05878400057554245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,12288,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,10240,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,10240,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,8192,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,10240,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,8192,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,7168,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,7168,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,65536,0.09417600184679031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,7168,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,6144,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,65536,0.09705600142478943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,65536,0.11414399743080139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,6144,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,5120,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,5120,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,4096,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,5120,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,8192,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,4096,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,4096,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,3584,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,3072,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,2048,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,2560,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,1536,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,1024,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,768,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,64,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,32,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,10240,1.5611519813537598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,12288,1.862720012664795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,10240,1.5216959714889526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,12288,1.831488013267517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,8192,1.2553280591964722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,8192,1.239583969116211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,16384,2.4705920219421387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,16384,2.4128639698028564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,12288,1.605023980140686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,7168,1.1043200492858887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,7168,1.0822399854660034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,16384,2.111776113510132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,6144,0.9683520197868347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,6144,0.9396799802780151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,8192,1.0867520570755005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,10240,1.3612480163574219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,7168,0.9719359874725342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,5120,0.8091199994087219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,5120,0.7925440073013306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,4096,0.6581760048866272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,4096,0.6423680186271667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,3584,0.5691199898719788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,6144,0.8444160223007202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,3072,0.5067520141601562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,4096,0.589631974697113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,5120,0.715391993522644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,3584,0.5820159912109375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,3072,0.5020800232887268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,2560,0.43513599038124084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,2560,0.41951999068260193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,3584,0.5234240293502808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,2048,0.35388800501823425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,2048,0.35865598917007446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,3072,0.4601919949054718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,1536,0.2765760123729706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,1024,0.187391996383667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,1024,0.1910720020532608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,2560,0.3983039855957031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,1536,0.2848320007324219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,768,0.15308800339698792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,2048,0.32921600341796875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,768,0.14428800344467163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,1536,0.26767998933792114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,512,0.10873600095510483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,1024,0.2033279985189438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,512,0.1074879989027977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,256,0.07158400118350983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,768,0.1709119975566864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,256,0.06905599683523178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,64,0.05558399856090546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,128,0.05612799897789955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,128,0.055776000022888184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,64,0.055776000022888184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,512,0.13817599415779114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,256,0.10681600123643875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,64,0.07305599749088287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,32,0.05913599953055382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,32,0.061055999249219894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,32,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,128,0.0833280012011528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,16384,0.6505600214004517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,12288,0.4610239863395691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,10240,0.38998401165008545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,12288,0.4785600006580353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,65536,2.4111359119415283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,16384,0.6084799766540527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,10240,0.3973439931869507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,16384,0.5320320129394531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,12288,0.40726399421691895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,7168,0.2844479978084564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,8192,0.3206399977207184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,8192,0.3173440098762512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,8192,0.2775680124759674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,10240,0.34198400378227234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,7168,0.2763200104236603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,5120,0.2078399956226349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,6144,0.2555199861526489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,6144,0.2503040134906769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,7168,0.2470719963312149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,5120,0.20422400534152985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,6144,0.21430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,4096,0.17148800194263458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,4096,0.16672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,5120,0.18252800405025482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,3584,0.15408000349998474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,3584,0.15136000514030457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,4096,0.15142400562763214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,3072,0.13017599284648895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,3072,0.13283200562000275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,2560,0.1136000007390976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,2560,0.11401599645614624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,3584,0.1351040005683899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,3072,0.12025599926710129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,2048,0.09372799843549728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,1536,0.07286400347948074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,1536,0.07327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,2048,0.09347199648618698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,2560,0.10307200253009796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,2048,0.08646400272846222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,1024,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,1024,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,1536,0.07116799801588058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,1024,0.055743999779224396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,768,0.04339199885725975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,768,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,512,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,512,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,256,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,768,0.047807998955249786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,65536,2.3696320056915283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,512,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,256,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,128,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,256,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,128,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,64,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,64,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,128,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,32,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,64,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,32,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,32,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,12288,0.3509120047092438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,16384,0.49404799938201904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,16384,0.4589439928531647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,12288,0.35555198788642883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,65536,2.076256036758423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,65536,1.8079359531402588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,16384,0.4023680090904236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,10240,0.2924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,10240,0.2996160089969635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,8192,0.2574720084667206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,8192,0.23683199286460876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,12288,0.30617600679397583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,10240,0.2603200078010559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,7168,0.2099519968032837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,7168,0.21401600539684296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,8192,0.20931200683116913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,6144,0.1841599941253662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,7168,0.18649600446224213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,6144,0.18892799317836761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,5120,0.15612800419330597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,5120,0.15964800119400024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,6144,0.16291199624538422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,5120,0.13916799426078796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,4096,0.13152000308036804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,3584,0.11615999788045883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,4096,0.1284479945898056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,3072,0.10070399940013885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,4096,0.11427199840545654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,3584,0.10275200009346008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,3072,0.10255999863147736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,3072,0.09167999774217606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,2560,0.08697599917650223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,2560,0.08806400001049042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,2048,0.07334399968385696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,2560,0.07868800312280655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,65536,1.7799359560012817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,1536,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,2048,0.07295999675989151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,3584,0.1143679991364479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,2048,0.066880002617836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,1024,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,1536,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,1024,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,768,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,1536,0.05443200096487999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,1024,0.04364800080657005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,768,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,512,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,512,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,768,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,256,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,256,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,512,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,256,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,128,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,128,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,64,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,64,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,128,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,64,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,32,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,32,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,32,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,65536,1.5488959550857544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,12288,0.29580798745155334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,16384,0.3928320109844208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,12288,0.3012480139732361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,16384,0.3837119936943054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,16384,0.40064001083374023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,65536,1.486624002456665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,10240,0.25148800015449524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,12288,0.30527999997138977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,10240,0.25571200251579285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,10240,0.25702399015426636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,8192,0.2104319930076599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,8192,0.20688000321388245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,7168,0.18438400328159332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,8192,0.2094080001115799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,7168,0.18943999707698822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,6144,0.16204799711704254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,6144,0.15878400206565857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,7168,0.18572799861431122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,5120,0.14047999680042267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,6144,0.16204799711704254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,5120,0.13939200341701508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,5120,0.13891200721263885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,4096,0.11599999666213989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,4096,0.11168000102043152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,3584,0.10739199817180634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,3584,0.10051199793815613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,4096,0.114656001329422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,3072,0.08643200248479843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,3072,0.08591999858617783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,3584,0.10259199887514114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,2560,0.07526399940252304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,2560,0.07548800110816956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,65536,1.481152057647705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,3072,0.0904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,2560,0.07756800204515457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,2048,0.06191999837756157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,2048,0.0644799992442131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,1536,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,1536,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,1024,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,2048,0.06735999882221222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,1024,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,1536,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,1024,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,768,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,768,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,512,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,512,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,768,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,256,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,512,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,256,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,256,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,128,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,128,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,128,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,64,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,64,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,64,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,32,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,32,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,32,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,65536,1.5456639528274536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,16384,0.30988800525665283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,12288,0.23999999463558197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,16384,0.31190401315689087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,10240,0.20073600113391876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,16384,0.2677440047264099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,12288,0.2070399969816208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,12288,0.24886399507522583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,10240,0.20387199521064758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,8192,0.16592000424861908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,8192,0.1600639969110489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,7168,0.14697599411010742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,7168,0.1430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,10240,0.17427200078964233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,8192,0.14188799262046814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,7168,0.125791996717453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,6144,0.12559999525547028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,6144,0.1265919953584671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,5120,0.10729599744081497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,6144,0.11030399799346924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,5120,0.10851199924945831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,4096,0.08982399851083755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,4096,0.08726400136947632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,65536,1.2081279754638672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,5120,0.0944959968328476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,3584,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,65536,1.1887999773025513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,4096,0.0783040001988411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,3584,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,3584,0.07065600156784058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,3072,0.07116799801588058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,3072,0.07043199986219406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,2560,0.061503998935222626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,2048,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,2560,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,3072,0.061983998864889145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,2560,0.05407999828457832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,2048,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,2048,0.04608000069856644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,1536,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,1536,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,1024,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,1024,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,1024,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,1536,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,768,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,768,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,512,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,768,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,65536,1.032863974571228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,512,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,256,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,256,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,512,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,256,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,128,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,128,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,64,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,128,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,64,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,32,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,64,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,32,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,32,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,12288,0.2353920042514801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,12288,0.24182400107383728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,16384,0.31644800305366516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,16384,0.30979201197624207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,10240,0.20425599813461304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,10240,0.20041599869728088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,16384,0.26899200677871704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,12288,0.20534400641918182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,8192,0.16121600568294525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,10240,0.17392000555992126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,65536,1.1965440511703491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,7168,0.14739200472831726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,8192,0.16652800142765045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,7168,0.14287999272346497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,6144,0.12876799702644348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,8192,0.14166399836540222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,7168,0.12563200294971466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,6144,0.12668800354003906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,5120,0.10691200196743011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,6144,0.11084800213575363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,5120,0.11023999750614166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,4096,0.08982399851083755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,4096,0.088639996945858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,5120,0.09516800194978714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,3584,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,4096,0.07798399776220322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,3584,0.07942400127649307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,65536,1.209920048713684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,3072,0.07155200093984604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,3072,0.06985600292682648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,3584,0.070592001080513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,2560,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,3072,0.06220800057053566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,2560,0.061055999249219894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,2048,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,2048,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,2560,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,2048,0.04652800038456917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,1536,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,1536,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,1024,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,1024,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,768,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,768,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,1024,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,768,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,512,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,512,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,256,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,512,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,256,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,128,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,256,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,128,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,65536,1.032256007194519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,64,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,128,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,64,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,64,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,32,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,32,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,1536,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,32,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,16384,0.29795199632644653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,12288,0.18854400515556335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,12288,0.18665599822998047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,16384,0.24009600281715393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,10240,0.16044799983501434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,16384,0.26899200677871704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,10240,0.16089600324630737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,12288,0.20496000349521637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,8192,0.1297599971294403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,65536,0.8850880265235901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,8192,0.13382400572299957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,10240,0.17452800273895264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,7168,0.11619199812412262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,6144,0.09836799651384354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,7168,0.11590400338172913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,6144,0.10815999656915665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,8192,0.14022399485111237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,7168,0.12639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,5120,0.08508799970149994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,6144,0.11033599823713303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,5120,0.08560000360012054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,4096,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,4096,0.07791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,5120,0.09324800223112106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,65536,0.956991970539093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,3584,0.06540799885988235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,3072,0.06374400109052658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,4096,0.07673600316047668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,3584,0.06406400352716446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,3072,0.06566400080919266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,3584,0.06969600170850754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,2560,0.05673599988222122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,2560,0.04864000156521797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,3072,0.0607680007815361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,2048,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,2048,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,2560,0.05321599915623665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,1536,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,2048,0.044415999203920364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,1024,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,1536,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,1536,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,768,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,1024,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,768,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,1024,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,512,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,512,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,768,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,256,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,512,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,256,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,65536,1.0322879552841187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,256,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,128,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,128,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,128,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,64,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,64,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,32,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,32,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,12288,0.15689599514007568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,12288,0.19299200177192688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,16384,0.20553599298000336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,16384,0.20336000621318817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,16384,0.23161600530147552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,12288,0.1746239960193634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,65536,0.7588480114936829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,8192,0.11260800063610077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,10240,0.13840000331401825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,10240,0.14828799664974213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,8192,0.11529599875211716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,7168,0.10156799852848053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,7168,0.11987199634313583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,10240,0.13657599687576294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,8192,0.12003199756145477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,6144,0.09014400094747543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,5120,0.08457600325345993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,7168,0.10790400207042694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,6144,0.0912960022687912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,5120,0.09126400202512741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,6144,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,4096,0.06963200122117996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,5120,0.07996799796819687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,4096,0.06646399945020676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,65536,0.7384960055351257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,3584,0.063680000603199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,3584,0.06124800071120262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,4096,0.06540799885988235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,3072,0.05462399870157242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,3584,0.059647999703884125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,3072,0.05555199831724167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,3072,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,2560,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,2560,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,2560,0.047200001776218414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,2048,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,2048,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,1536,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,1536,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,2048,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,65536,0.950111985206604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,1024,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,1024,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,768,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,768,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,1536,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,1024,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,768,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,512,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,512,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,512,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,256,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,128,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,256,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,256,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,128,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,64,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,128,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,64,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,32,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,32,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,12288,0.12220799922943115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,16384,0.17190399765968323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,12288,0.1244800016283989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,16384,0.158720001578331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,16384,0.13654400408267975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,12288,0.10662399977445602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,10240,0.1043199971318245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,10240,0.11088000237941742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,8192,0.08409599959850311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,8192,0.09059199690818787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,10240,0.0899839997291565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,7168,0.07599999755620956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,8192,0.07385600358247757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,7168,0.0801599994301796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,7168,0.06649599969387054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,6144,0.06745599955320358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,6144,0.0687360018491745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,5120,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,6144,0.05772799998521805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,5120,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,65536,0.6106879711151123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,4096,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,5120,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,4096,0.04835199937224388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,65536,0.8329600095748901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,4096,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,3584,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,3072,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,3584,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,3584,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,3072,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,2560,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,3072,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,2560,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,65536,0.5191680192947388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,2048,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,2048,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,2560,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,1536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,2048,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,1536,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,1536,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,1024,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,1024,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,768,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,1024,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,512,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,768,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,512,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,768,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,512,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,256,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,128,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,64,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,32,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,32,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,32,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,16384,0.15859200060367584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,16384,0.16176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,12288,0.12140800058841705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,12288,0.12495999783277512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,16384,0.13792000710964203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,10240,0.1042879968881607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,12288,0.10489600151777267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,10240,0.10649599879980087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,8192,0.08515200018882751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,8192,0.08691199868917465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,10240,0.09004800021648407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,7168,0.07763200253248215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,8192,0.07283200323581696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,7168,0.0783040001988411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,7168,0.06576000154018402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,6144,0.06694400310516357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,5120,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,6144,0.06892800331115723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,65536,0.6106240153312683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,6144,0.057920001447200775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,65536,0.5988799929618835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,4096,0.047648001462221146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,5120,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,5120,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,4096,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,3584,0.04396799951791763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,4096,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,3584,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,65536,0.5237759947776794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,3584,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,3072,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,3072,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,2560,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,3072,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,2560,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,2560,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,2048,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,2048,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,2048,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,1536,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,1536,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,1024,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,1536,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,768,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,1024,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,1024,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,768,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,768,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,512,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,512,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,512,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,256,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,256,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,256,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,64,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,128,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,64,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,32,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,32,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,12288,0.1218239963054657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,12288,0.12204799801111221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,16384,0.15503999590873718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,16384,0.15087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,10240,0.10134399682283401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,12288,0.10630399733781815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,16384,0.1369280070066452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,10240,0.1048320010304451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,8192,0.084927998483181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,10240,0.09008000046014786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,8192,0.08716800063848495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,7168,0.07823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,65536,0.5990399718284607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,8192,0.07308799773454666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,65536,0.6156799793243408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,6144,0.06739199906587601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,7168,0.07763200253248215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,6144,0.0687360018491745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,5120,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,7168,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,6144,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,4096,0.04809600114822388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,4096,0.0490880012512207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,5120,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,4096,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,3584,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,3584,0.04560000076889992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,3584,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,3072,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,3072,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,2560,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,65536,0.5216320157051086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,2560,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,3072,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,2048,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,2560,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,5120,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,1536,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,2048,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,2048,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,1536,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,1024,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,1536,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,1024,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,1024,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,768,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,768,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,512,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,768,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,512,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,512,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,256,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,128,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,256,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,128,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,64,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,32,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,32,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,12288,0.12086399644613266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,16384,0.1584639996290207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,12288,0.10380800068378448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,16384,0.13279999792575836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,16384,0.13199999928474426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,12288,0.0981760025024414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,10240,0.10412800312042236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,10240,0.08940800279378891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,8192,0.08441600203514099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,10240,0.08361600339412689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,8192,0.07462400197982788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,7168,0.06639999896287918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,8192,0.06950400024652481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,7168,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,6144,0.06617599725723267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,7168,0.06191999837756157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,6144,0.06015999987721443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,6144,0.054207999259233475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,65536,0.47574400901794434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,65536,0.465472012758255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,5120,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,5120,0.05302400141954422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,4096,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,5120,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,4096,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,65536,0.5186880230903625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,3584,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,3584,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,4096,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,3072,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,3072,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,3584,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,3072,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,2560,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,2560,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,2560,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,2048,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,2048,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,1536,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,2048,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,1536,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,1536,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,1024,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,1024,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,1024,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,768,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,768,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,512,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,768,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,512,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,512,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,256,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,128,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,64,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,12288,0.08156800270080566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,12288,0.06940799951553345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,16384,0.08921600133180618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,16384,0.0899519994854927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,16384,0.09782399982213974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,12288,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,10240,0.05859199911355972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,10240,0.07049600034952164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,8192,0.05804799869656563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,10240,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,65536,0.31881600618362427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,8192,0.05023999884724617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,7168,0.043487999588251114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,8192,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,7168,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,6144,0.04950400069355965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,6144,0.05532800033688545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,7168,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,6144,0.04707200080156326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,5120,0.04364800080657005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,5120,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,4096,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,4096,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,65536,0.31273600459098816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,5120,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,4096,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,3584,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,65536,0.36665600538253784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,3584,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,3072,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,3584,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,3072,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,3072,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,2560,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,2560,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,2048,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,2048,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,2560,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,2048,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,1536,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,1536,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,1536,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,1024,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,1024,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,768,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,768,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,768,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,512,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,512,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,64,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,32,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,12288,0.06454399973154068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,12288,0.06511999666690826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,16384,0.07807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,16384,0.07923199981451035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,16384,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,12288,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,10240,0.057151999324560165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,8192,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,10240,0.05926400050520897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,65536,0.3861120045185089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,10240,0.05238400027155876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,65536,0.24537600576877594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,8192,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,8192,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,7168,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,7168,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,7168,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,6144,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,6144,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,6144,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,5120,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,4096,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,5120,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,5120,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,4096,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,4096,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,3584,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,65536,0.31302401423454285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,3584,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,3072,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,3584,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,3072,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,2560,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,3072,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,2560,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,2560,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,2048,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,2048,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,1536,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,1536,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,1536,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,1024,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,1024,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,768,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,1024,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,256,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,2048,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,64,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,12288,0.05132799968123436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,12288,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,16384,0.06259199976921082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,16384,0.06304000318050385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,16384,0.056543998420238495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,12288,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,10240,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,10240,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,10240,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,8192,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,8192,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,7168,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,8192,0.03209599852561951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,7168,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,65536,0.18166400492191315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,6144,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,65536,0.18227200210094452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,6144,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,6144,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,65536,0.20278400182724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,5120,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,5120,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,4096,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,5120,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,4096,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,3584,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,3072,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,3584,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,3072,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,2560,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,2560,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,2560,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,2048,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,1536,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,1536,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,2048,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,768,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,512,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,32,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,12288,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,16384,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,16384,0.06294400244951248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,16384,0.05929600074887276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,10240,0.05081599950790405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,12288,0.04447999969124794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,10240,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,65536,0.14486399292945862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,10240,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,8192,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,65536,0.14351999759674072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,8192,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,8192,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,12288,0.04396799951791763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,7168,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,7168,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,6144,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,7168,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,6144,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,65536,0.20339199900627136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,5120,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,4096,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,5120,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,5120,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,4096,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,4096,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,3072,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,3072,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,3584,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,3584,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,2560,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,2560,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,2048,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,1536,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,2048,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,1536,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,2048,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,1536,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,1024,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,768,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,12288,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,16384,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,16384,0.0432640016078949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,12288,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,12288,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,10240,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,10240,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,10240,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,8192,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,65536,0.11299200356006622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,8192,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,7168,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,8192,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,7168,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,65536,0.11235199868679047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,7168,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,6144,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,6144,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,5120,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,6144,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,5120,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,65536,0.11823999881744385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,4096,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,5120,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,3584,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,3584,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,3072,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,3072,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,2560,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,3072,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,2048,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,2560,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,2048,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,1536,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,1536,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,1024,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,12288,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,12288,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,16384,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,16384,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,16384,0.056703999638557434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,12288,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,10240,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,10240,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,8192,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,10240,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,8192,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,8192,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,7168,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,7168,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,65536,0.08003199845552444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,65536,0.07603199779987335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,7168,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,6144,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,6144,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,5120,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,6144,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,5120,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,5120,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,65536,0.11833599954843521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,3072,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,4096,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,3584,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,3072,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,2560,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,2048,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,1536,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,1024,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,32,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,12288,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,12288,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,16384,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,16384,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,16384,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,12288,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,10240,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,10240,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,10240,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,8192,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,8192,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,65536,0.06380800157785416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,7168,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,8192,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,65536,0.06364800035953522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,7168,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,6144,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,5120,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,5120,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,65536,0.11673600226640701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,4096,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,3584,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,3072,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,2560,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,6144,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,2048,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,1024,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,768,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,64,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,12288,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,16384,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,12288,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,16384,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,16384,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,12288,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,10240,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,10240,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,10240,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,8192,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,8192,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,65536,0.06345599889755249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,7168,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,8192,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,65536,0.0639680027961731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,7168,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,6144,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,7168,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,6144,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,5120,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,5120,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,65536,0.11801599711179733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,5120,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,4096,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,4096,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,3072,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,3584,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,3072,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,2560,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,2048,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,1536,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,768,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,64,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,32,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,12288,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,12288,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,16384,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,16384,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,16384,0.055615998804569244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,12288,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,10240,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,10240,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,10240,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,8192,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,65536,0.06332799792289734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,8192,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,7168,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,8192,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,7168,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,6144,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,6144,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,7168,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,5120,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,4096,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,65536,0.11244799941778183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,5120,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,65536,0.06294400244951248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,4096,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,3584,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,3072,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,2560,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,2048,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,1536,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,768,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,512,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,64,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,64,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,32,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,10240,1.1749440431594849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,16384,1.8562239408493042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,12288,1.3697279691696167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,8192,0.9345279932022095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,12288,1.4071040153503418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,10240,1.1443519592285156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,8192,0.9554880261421204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,7168,0.8398720026016235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,7168,0.8223680257797241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,6144,0.7275519967079163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,6144,0.7045120000839233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,16384,1.807487964630127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,12288,1.2055679559707642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,10240,1.0147839784622192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,8192,0.8318079710006714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,4096,0.48767998814582825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,5120,0.5930560231208801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,7168,0.7239360213279724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,16384,1.5981440544128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,5120,0.6100800037384033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,4096,0.5038719773292542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,5120,0.535968005657196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,6144,0.633184015750885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,4096,0.4387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,3584,0.44047999382019043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,3072,0.37862399220466614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,2560,0.3263680040836334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,2560,0.32233598828315735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,3584,0.43139201402664185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,3072,0.38758400082588196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,2048,0.2672959864139557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,2048,0.2696000039577484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,1536,0.21452799439430237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,3584,0.3978559970855713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,1536,0.21065600216388702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,1024,0.365664005279541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,1024,0.34886398911476135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,3072,0.3472000062465668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,768,0.2789759933948517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,2560,0.2969279885292053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,2048,0.2502079904079437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,1024,0.16543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,1536,0.2056960016489029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,768,0.26236799359321594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,512,0.08799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,512,0.11151999980211258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,256,0.05907199904322624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,256,0.05859199911355972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,512,0.08899199962615967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,128,0.04339199885725975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,768,0.14003199338912964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,256,0.08454400300979614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,128,0.05628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,64,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,64,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,64,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,32,0.0488319993019104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,128,0.0650240033864975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,32,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,32,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,12288,0.34838399291038513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,12288,1.0989439487457275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,16384,0.4930559992790222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,10240,0.3022080063819885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,10240,0.29574400186538696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,16384,1.423359990119934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,8192,0.23996800184249878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,65536,1.8243520259857178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,12288,0.3078719973564148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,16384,0.3983359932899475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,65536,1.7688640356063843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,7168,0.21740800142288208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,10240,0.25884801149368286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,8192,0.24796800315380096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,7168,0.21315200626850128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,6144,0.1834239959716797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,8192,0.2112639993429184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,6144,0.16358399391174316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,5120,0.15641599893569946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,5120,0.1385599970817566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,5120,0.15967999398708344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,7168,0.18646399676799774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,6144,0.18697600066661835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,4096,0.12857599556446075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,3584,0.11878400295972824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,4096,0.13052800297737122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,3584,0.10412800312042236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,3584,0.1141119971871376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,4096,0.11478400230407715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,3072,0.1037760004401207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,3072,0.10208000242710114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,2560,0.08646400272846222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,2560,0.08966399729251862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,2048,0.07321599870920181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,3072,0.09136000275611877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,2560,0.07859200239181519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,2048,0.07363200187683105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,1536,0.13996799290180206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,1536,0.1366720050573349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,2048,0.06710399687290192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,1536,0.05548800155520439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,1024,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,1024,0.091839998960495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,768,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,768,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,512,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,1024,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,512,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,768,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,512,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,256,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,256,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,128,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,256,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,128,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,64,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,64,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,128,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,64,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,32,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,32,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,32,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,16384,1.0695680379867554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,16384,0.3495999872684479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,12288,0.2733759880065918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,65536,1.3307199478149414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,16384,0.40323200821876526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,12288,0.8120319843292236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,10240,0.2627519965171814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,10240,0.25920000672340393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,8192,0.18937599658966064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,8192,0.2120320051908493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,10240,0.23119999468326569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,12288,0.3038719892501831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,65536,1.5459519624710083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,7168,0.17107200622558594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,7168,0.16969600319862366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,8192,0.21055999398231506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,5120,0.12716799974441528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,6144,0.1483519971370697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,6144,0.14902399480342865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,7168,0.1870719939470291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,6144,0.1618880033493042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,4096,0.10716799646615982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,4096,0.11257600039243698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,5120,0.12940800189971924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,5120,0.1387840062379837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,3584,0.09839999675750732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,4096,0.114656001329422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,3584,0.09708800166845322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,3072,0.08806400001049042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,3072,0.08963199704885483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,3584,0.10284800082445145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,2560,0.07587199658155441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,2560,0.07779199630022049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,3072,0.09091199934482574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,65536,1.3116480112075806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,2048,0.06265600025653839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,2048,0.06617599725723267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,2560,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,1536,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,2048,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,1536,0.04912000149488449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,1024,0.03654399886727333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,1024,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,1024,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,1536,0.05484800040721893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,768,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,768,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,512,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,256,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,512,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,768,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,256,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,512,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,256,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,128,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,128,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,128,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,64,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,64,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,64,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,32,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,32,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,32,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,65536,1.5455360412597656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,12288,0.23635199666023254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,16384,0.31811198592185974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,16384,0.9295679926872253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,12288,0.24265600740909576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,10240,0.19990399479866028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,12288,0.20528000593185425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,16384,0.27142399549484253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,8192,0.16463999450206757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,10240,0.2030400037765503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,8192,0.16358399391174316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,10240,0.17363199591636658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,7168,0.14710399508476257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,8192,0.14313599467277527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,7168,0.14342400431632996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,6144,0.12777599692344666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,7168,0.12668800354003906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,6144,0.13180799782276154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,5120,0.27167999744415283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,6144,0.11190400272607803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,5120,0.11215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,4096,0.09120000153779984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,4096,0.1424960047006607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,5120,0.09504000097513199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,3584,0.08009599894285202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,3584,0.18751999735832214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,65536,1.2132799625396729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,4096,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,3072,0.07116799801588058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,3072,0.07056000083684921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,3584,0.07068800181150436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,2560,0.060256000608205795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,65536,1.1974719762802124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,3072,0.0626240000128746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,2560,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,2560,0.0628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,2048,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,2048,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,1536,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,2048,0.04665600135922432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,1536,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,1024,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,1536,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,1024,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,768,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,512,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,1024,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,768,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,512,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,512,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,256,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,256,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,128,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,256,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,128,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,128,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,64,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,64,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,32,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,64,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,65536,1.032863974571228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,32,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,768,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,12288,0.1889919936656952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,16384,0.2441280037164688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,16384,0.23958399891853333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,12288,0.18688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,10240,0.16204799711704254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,16384,0.27081599831581116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,10240,0.16035200655460358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,12288,0.20534400641918182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,10240,0.1722559928894043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,8192,0.1366720050573349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,8192,0.1329279989004135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,7168,0.12060800194740295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,6144,0.10496000200510025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,8192,0.14230400323867798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,7168,0.12025599926710129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,6144,0.10710400342941284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,7168,0.127360001206398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,5120,0.09200000017881393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,6144,0.10992000252008438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,5120,0.09206400066614151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,4096,0.07788799703121185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,4096,0.07955200225114822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,4096,0.07785599678754807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,5120,0.09363199770450592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,3584,0.07180800288915634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,65536,0.9034240245819092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,3584,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,3072,0.06259199976921082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,3584,0.07036799937486649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,3072,0.06505600363016129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,3072,0.05663999915122986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,65536,0.8798080086708069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,2560,0.04800000041723251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,2560,0.04787199944257736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,2048,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,2048,0.04620800167322159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,1536,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,1536,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,2048,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,2560,0.054016001522541046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,1536,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,1024,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,1024,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,768,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,1024,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,768,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,512,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,768,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,512,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,256,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,512,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,128,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,256,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,65536,1.0409280061721802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,256,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,128,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,128,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,64,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,64,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,64,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,32,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,32,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,32,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,16384,0.21769599616527557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,16384,0.2688640058040619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,12288,0.17164799571037292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,12288,0.16771200299263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,16384,0.2149759978055954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,12288,0.20640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,10240,0.14236800372600555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,10240,0.14774399995803833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,10240,0.1745920032262802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,8192,0.1189119964838028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,8192,0.12108799815177917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,65536,1.0326080322265625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,8192,0.14140799641609192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,7168,0.10857599973678589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,6144,0.09398400038480759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,6144,0.09487999975681305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,7168,0.10755199939012527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,5120,0.08310399949550629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,6144,0.10969600081443787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,5120,0.08383999764919281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,4096,0.07103999704122543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,5120,0.09385599941015244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,4096,0.07187200337648392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,7168,0.12652799487113953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,3584,0.0658240020275116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,65536,0.7807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,3584,0.06415999680757523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,4096,0.07763200253248215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,3072,0.059967998415231705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,3584,0.07043199986219406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,65536,1.1142719984054565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,3072,0.06067200005054474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,2560,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,2560,0.09859199821949005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,2048,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,2560,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,2048,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,3072,0.06217600032687187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,2048,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,1536,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,1024,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,1024,0.043327998369932175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,1536,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,768,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,1536,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,1024,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,512,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,768,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,256,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,768,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,512,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,256,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,512,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,256,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,128,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,128,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,64,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,64,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,128,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,32,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,32,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,16384,0.2260800004005432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,16384,0.18572799861431122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,12288,0.14614400267601013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,12288,0.1480640023946762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,10240,0.15609599649906158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,16384,0.2712000012397766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,12288,0.2064639925956726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,10240,0.1263359934091568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,65536,2.0852160453796387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,10240,0.17295999825000763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,8192,0.270143985748291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,65536,0.6645119786262512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,65536,1.0321919918060303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,7168,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,8192,0.12876799702644348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,7168,0.23907199501991272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,8192,0.14239999651908875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,5120,0.07385600358247757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,6144,0.09996800124645233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,6144,0.10096000134944916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,7168,0.12556800246238708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,5120,0.08694399893283844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,6144,0.09312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,5120,0.07820799946784973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,4096,0.0700799971818924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,3584,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,3584,0.08140800148248672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,4096,0.07068800181150436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,4096,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,3072,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,3072,0.052480001002550125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,3584,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,3072,0.05212799832224846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,2560,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,2560,0.06275200098752975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,2048,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,2048,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,2560,0.046592000871896744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,1536,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,1536,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,2048,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,1024,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,1024,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,768,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,1536,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,768,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,1024,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,768,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,512,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,512,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,512,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,256,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,256,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,128,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,256,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,128,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,64,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,128,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,64,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,32,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,16384,0.20239999890327454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,16384,0.158720001578331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,12288,0.12345600128173828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,32,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,12288,0.1558080017566681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,16384,0.19145600497722626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,12288,0.14457599818706512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,65536,0.6128640174865723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,10240,0.1050880029797554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,10240,0.10691200196743011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,8192,0.08579199761152267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,65536,0.5985919833183289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,10240,0.12307199835777283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,8192,0.08720000088214874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,7168,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,7168,0.07868800312280655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,6144,0.06796800345182419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,6144,0.0682239979505539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,8192,0.09868799895048141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,7168,0.08953599631786346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,5120,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,6144,0.07804799824953079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,5120,0.06028800085186958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,4096,0.04851200059056282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,5120,0.06656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,4096,0.0490880012512207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,4096,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,3584,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,3584,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,3072,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,3584,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,2560,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,3072,0.0514880008995533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,3072,0.08895999938249588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,2560,0.05321599915623665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,2048,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,2560,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,2048,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,1536,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,1536,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,2048,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,1024,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,1024,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,1536,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,768,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,1024,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,768,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,512,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,512,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,768,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,65536,0.8774399757385254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,256,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,512,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,128,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,256,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,128,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,256,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,128,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,64,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,32,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,32,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,12288,0.12489599734544754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,12288,0.12230399996042252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,16384,0.1642560064792633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,16384,0.13571199774742126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,16384,0.1589760035276413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,12288,0.10556799918413162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,10240,0.1043199971318245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,10240,0.10710400342941284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,8192,0.0854720026254654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,10240,0.08844800293445587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,65536,1.39792001247406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,65536,0.597823977470398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,7168,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,8192,0.08726400136947632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,6144,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,7168,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,8192,0.07379200309515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,7168,0.06480000168085098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,6144,0.06764800101518631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,5120,0.0597120001912117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,4096,0.04848000034689903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,4096,0.0490880012512207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,5120,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,5120,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,6144,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,4096,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,3584,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,3584,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,3584,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,3072,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,3072,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,2560,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,2560,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,2560,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,65536,0.5232639908790588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,2048,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,2048,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,1536,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,2048,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,1536,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,1024,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,1024,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,1536,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,768,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,1024,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,768,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,3072,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,768,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,512,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,256,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,512,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,256,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,128,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,256,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,128,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,64,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,32,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,32,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,12288,0.12454400211572647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,12288,0.1215360015630722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,16384,0.1605439931154251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,16384,0.16099199652671814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,16384,0.13743999600410461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,12288,0.10595200210809708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,10240,0.10422399640083313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,10240,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,8192,0.08691199868917465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,8192,0.08521600067615509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,10240,0.09001599997282028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,65536,0.5979840159416199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,8192,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,7168,0.07574400305747986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,6144,0.06752000004053116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,7168,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,6144,0.06656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,5120,0.057023998349905014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,7168,0.06614399701356888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,65536,0.616927981376648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,6144,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,5120,0.0594559982419014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,4096,0.048608001321554184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,4096,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,5120,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,4096,0.04275200143456459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,3584,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,3584,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,3072,0.039423998445272446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,3072,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,3584,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,2560,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,2560,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,3072,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,65536,0.5229439735412598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,2560,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,2048,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,1536,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,2048,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,1536,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,1024,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,2048,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,1536,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,1024,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,1024,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,768,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,768,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,512,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,768,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,512,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,512,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,256,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,256,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,256,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,128,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,64,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,64,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,32,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,12288,0.09222400188446045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,16384,0.11868800222873688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,16384,0.11737599968910217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,16384,0.138047993183136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,10240,0.10307200253009796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,12288,0.10553599894046783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,12288,0.12559999525547028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,10240,0.106175996363163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,8192,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,65536,0.6151679754257202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,65536,0.41625601053237915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,8192,0.08473599702119827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,10240,0.09027200192213058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,8192,0.07395199686288834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,7168,0.11683200299739838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,7168,0.07817599922418594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,6144,0.06697600334882736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,6144,0.06787200272083282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,6144,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,7168,0.06531199812889099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,5120,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,5120,0.05833600088953972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,4096,0.04774399846792221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,5120,0.04950400069355965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,4096,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,3584,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,3584,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,4096,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,3584,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,3072,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,3072,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,2560,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,3072,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,2560,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,2560,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,65536,0.5204160213470459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,2048,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,2048,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,2048,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,1536,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,1536,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,1024,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,1536,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,1024,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,768,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,1024,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,768,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,768,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,512,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,512,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,256,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,512,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,128,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,256,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,64,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,128,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,64,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,32,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,32,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,32,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,12288,0.07999999821186066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,16384,0.09471999853849411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,12288,0.08089599758386612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,16384,0.10780800133943558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,16384,0.10364799946546555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,10240,0.06864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,8192,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,10240,0.06918399780988693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,12288,0.08246400207281113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,10240,0.06985600292682648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,8192,0.05737600103020668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,65536,0.38867199420928955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,7168,0.05164799839258194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,8192,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,7168,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,6144,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,7168,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,5120,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,6144,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,5120,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,65536,0.39081600308418274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,6144,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,5120,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,4096,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,4096,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,3584,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,3584,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,3584,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,4096,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,3072,0.049375999718904495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,65536,0.5192639827728271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,3072,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,3072,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,2560,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,2560,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,2048,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,2560,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,1536,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,1536,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,2048,0.0379519984126091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,2048,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,1024,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,1536,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,1024,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,768,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,768,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,512,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,256,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,128,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,1024,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,32,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,12288,0.06761600077152252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,16384,0.07807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,12288,0.06985600292682648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,16384,0.17769600450992584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,16384,0.09849599748849869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,12288,0.07622399926185608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,10240,0.059167999774217606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,10240,0.10230399668216705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,8192,0.052639998495578766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,10240,0.06400000303983688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,8192,0.06892800331115723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,65536,0.2441920042037964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,8192,0.05920000001788139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,7168,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,7168,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,7168,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,6144,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,6144,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,65536,0.2401600033044815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,6144,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,5120,0.04729599878191948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,5120,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,5120,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,4096,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,3584,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,4096,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,3584,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,4096,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,3584,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,65536,0.37990400195121765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,3072,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,3072,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,3072,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,2560,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,2560,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,2560,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,2048,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,2048,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,2048,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,1536,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,1536,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,1536,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,1024,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,1024,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,1024,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,768,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,32,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,12288,0.05843200162053108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,16384,0.10147199779748917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,12288,0.05852799862623215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,16384,0.07145600020885468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,16384,0.07763200253248215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,10240,0.06755200028419495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,10240,0.0687360018491745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,12288,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,65536,0.19497600197792053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,8192,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,10240,0.05145600065588951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,7168,0.046751998364925385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,8192,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,7168,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,7168,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,65536,0.19062399864196777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,6144,0.039423998445272446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,6144,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,5120,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,5120,0.042847998440265656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,6144,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,5120,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,8192,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,4096,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,65536,0.2941119968891144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,4096,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,3584,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,4096,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,3584,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,3584,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,3072,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,3072,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,2560,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,2560,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,3072,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,2560,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,2048,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,2048,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,1536,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,2048,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,1536,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,1024,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,1536,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,768,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,1024,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,768,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,64,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,32,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,12288,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,12288,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,16384,0.09196799993515015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,16384,0.052671998739242554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,16384,0.05711999908089638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,12288,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,10240,0.07248000055551529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,10240,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,10240,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,8192,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,65536,0.1430719941854477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,7168,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,8192,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,7168,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,65536,0.1432960033416748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,6144,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,7168,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,6144,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,6144,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,5120,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,5120,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,65536,0.20457600057125092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,4096,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,5120,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,4096,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,3584,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,3584,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,3072,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,3072,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,2560,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,2560,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,2048,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,2048,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,2048,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,1024,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,1024,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,768,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,12288,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,16384,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,16384,0.046271998435258865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,12288,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,16384,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,12288,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,10240,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,10240,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,8192,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,10240,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,8192,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,7168,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,65536,0.12697599828243256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,65536,0.1260800063610077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,7168,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,6144,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,7168,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,5120,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,6144,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,5120,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,65536,0.20470400154590607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,6144,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,5120,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,4096,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,4096,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,3584,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,3584,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,3072,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,3072,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,3072,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,2560,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,2560,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,2048,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,2048,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,2048,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,1536,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,1024,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,768,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,64,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,12288,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,12288,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,16384,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,16384,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,16384,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,12288,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,10240,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,10240,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,8192,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,10240,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,65536,0.09286399930715561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,65536,0.1037760004401207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,8192,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,7168,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,7168,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,8192,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,7168,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,6144,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,5120,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,6144,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,6144,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,5120,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,4096,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,65536,0.11798399686813354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,5120,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,4096,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,3584,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,3584,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,3584,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,3072,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,3072,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,2560,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,2048,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,2560,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,1536,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,2048,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,32,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,12288,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,16384,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,16384,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,16384,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,12288,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,10240,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,10240,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,8192,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,8192,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,10240,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,65536,0.07180800288915634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,8192,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,7168,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,7168,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,65536,0.06854400038719177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,7168,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,6144,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,6144,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,5120,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,5120,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,5120,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,4096,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,4096,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,65536,0.1170559972524643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,4096,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,3584,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,3072,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,2560,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,2048,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,1536,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,1536,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,1024,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,64,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,12288,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,12288,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,16384,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,16384,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,12288,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,10240,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,10240,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,10240,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,8192,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,8192,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,8192,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,65536,0.05830400064587593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,65536,0.08259200304746628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,16384,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,6144,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,7168,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,7168,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,5120,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,6144,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,65536,0.11740799993276596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,4096,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,5120,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,4096,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,3584,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,3072,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,2560,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,2048,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,1536,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,1536,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,1024,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,12288,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,12288,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,16384,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,16384,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,16384,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,12288,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,10240,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,10240,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,10240,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,8192,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,65536,0.0578560009598732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,65536,0.056063998490571976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,7168,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,8192,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,65536,0.11776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,8192,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,7168,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,6144,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,6144,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,5120,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,5120,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,3584,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,4096,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,3584,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,3072,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,3072,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,2560,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,2048,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,1536,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,1536,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,512,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,64,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,1536,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,12288,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,12288,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,16384,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,16384,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,16384,0.05283199995756149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,10240,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,12288,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,10240,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,8192,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,10240,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,7168,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,65536,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,65536,0.056095998734235764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,8192,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,8192,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,7168,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,6144,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,5120,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,5120,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,6144,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,6144,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,5120,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,65536,0.11260800063610077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,4096,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,4096,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,3584,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,3072,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,2560,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,1536,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,2048,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,768,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,64,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,32,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,12288,0.9472320079803467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,10240,0.7723519802093506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,10240,0.7931519746780396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,8192,0.6712319850921631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,8192,0.6344959735870361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,12288,1.1127680540084839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,16384,1.2344319820404053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,12288,0.8125439882278442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,10240,0.6865280270576477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,16384,1.2174400091171265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,6144,0.48524799942970276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,16384,1.0506880283355713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,7168,0.5507839918136597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,7168,0.5622720122337341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,8192,0.5544639825820923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,6144,0.4747839868068695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,5120,0.4187520146369934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,7168,0.4922240078449249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,3584,0.28832000494003296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,6144,0.42816001176834106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,4096,0.3325760066509247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,3584,0.29423999786376953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,4096,0.3341439962387085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,5120,0.4015040099620819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,3072,0.25407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,5120,0.3644160032272339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,3072,0.26025599241256714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,4096,0.29872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,2560,0.22169600427150726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,2560,0.21615999937057495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,2048,0.17558400332927704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,1536,0.1366720050573349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,3584,0.2662400007247925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,2048,0.17881600558757782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,1536,0.1398400068283081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,3072,0.2346239984035492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,1024,0.09772799909114838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,2560,0.20323200523853302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,2048,0.17263999581336975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,1024,0.09724800288677216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,768,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,768,0.07865600287914276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,1536,0.13913600146770477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,512,0.059776000678539276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,512,0.05817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,768,0.09087999910116196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,1024,0.10815999656915665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,256,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,256,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,128,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,512,0.07568000257015228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,128,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,256,0.058880001306533813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,64,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,128,0.046751998364925385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,64,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,32,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,64,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,32,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,32,0.04262400045990944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,16384,0.32099199295043945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,16384,0.31305599212646484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,12288,0.2436479926109314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,12288,0.23625600337982178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,16384,0.2691519856452942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,12288,0.20556800067424774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,10240,0.20108799636363983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,10240,0.20390400290489197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,65536,1.3062080144882202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,10240,0.1746560037136078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,8192,0.16659200191497803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,7168,0.1459520012140274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,8192,0.16275200247764587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,7168,0.14800000190734863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,6144,0.1257600039243698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,6144,0.12863999605178833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,8192,0.14294399321079254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,5120,0.10841599851846695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,7168,0.1273919939994812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,5120,0.10947199910879135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,4096,0.0894400030374527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,6144,0.111455999314785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,5120,0.09504000097513199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,4096,0.09120000153779984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,3584,0.08048000186681747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,3584,0.08108799904584885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,4096,0.07849600166082382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,3072,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,3072,0.07158400118350983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,3584,0.070592001080513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,2560,0.06102399900555611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,2560,0.06204799935221672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,3072,0.06294400244951248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,2048,0.05040000006556511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,2560,0.05526399984955788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,2048,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,1536,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,2048,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,1536,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,1024,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,65536,1.274783968925476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,1024,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,1536,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,1024,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,768,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,768,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,512,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,512,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,768,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,512,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,256,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,256,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,128,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,128,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,64,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,128,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,64,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,64,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,65536,1.0298559665679932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,32,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,32,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,32,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,256,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,12288,0.18563200533390045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,16384,0.2996799945831299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,12288,0.2271679937839508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,16384,0.23683199286460876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,10240,0.1927040070295334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,10240,0.16092799603939056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,16384,0.26950401067733765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,12288,0.2056639939546585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,8192,0.13699199259281158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,8192,0.1273919939994812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,7168,0.11244799941778183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,10240,0.17206400632858276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,7168,0.11750400066375732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,8192,0.14153599739074707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,6144,0.09967999905347824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,7168,0.12694400548934937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,6144,0.0982080027461052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,5120,0.08336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,5120,0.0841279998421669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,6144,0.11084800213575363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,4096,0.06892800331115723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,4096,0.07027199864387512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,5120,0.09443199634552002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,3584,0.06419199705123901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,3584,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,4096,0.07571200281381607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,65536,0.9120960235595703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,3072,0.05423999950289726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,3072,0.05580800026655197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,3584,0.07011199742555618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,2560,0.04707200080156326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,3072,0.06224000081419945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,2560,0.04864000156521797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,2048,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,65536,0.8804799914360046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,2560,0.052960000932216644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,2048,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,1536,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,2048,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,1536,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,1024,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,1024,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,1536,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,1024,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,768,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,768,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,512,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,512,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,512,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,768,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,256,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,256,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,65536,1.0405759811401367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,256,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,128,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,128,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,64,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,64,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,32,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,128,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,32,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,12288,0.15836800634860992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,12288,0.19487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,16384,0.24966399371623993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,16384,0.20336000621318817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,10240,0.13683199882507324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,10240,0.16499200463294983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,16384,0.2497279942035675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,12288,0.1765120029449463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,10240,0.14972800016403198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,8192,0.11427199840545654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,8192,0.1154559999704361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,65536,0.9438400268554688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,7168,0.10313600301742554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,65536,0.7431359887123108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,8192,0.12534399330615997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,6144,0.09062399715185165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,7168,0.10300800204277039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,6144,0.09296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,7168,0.10969600081443787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,6144,0.0931520015001297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,5120,0.09171199798583984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,5120,0.07996799796819687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,4096,0.0682239979505539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,5120,0.07923199981451035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,3584,0.061792001128196716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,3584,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,4096,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,3072,0.06111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,3072,0.05488000065088272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,3584,0.06038400158286095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,3072,0.05257600173354149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,2560,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,2560,0.04758400097489357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,4096,0.06867200136184692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,2048,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,2560,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,2048,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,1536,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,2048,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,1536,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,1536,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,1024,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,1024,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,1024,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,768,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,768,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,512,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,512,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,768,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,256,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,512,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,256,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,256,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,128,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,128,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,65536,0.9439679980278015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,128,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,64,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,32,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,32,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,64,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,12288,0.1218239963054657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,12288,0.12505599856376648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,16384,0.17283199727535248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,16384,0.1607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,16384,0.13948799669742584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,10240,0.10639999806880951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,10240,0.10278400033712387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,12288,0.1058880016207695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,10240,0.09008000046014786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,65536,0.6172800064086914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,8192,0.08604799956083298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,8192,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,8192,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,7168,0.07744000107049942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,65536,0.6385599970817566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,7168,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,6144,0.06784000247716904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,5120,0.05833600088953972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,7168,0.06617599725723267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,6144,0.06841599941253662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,5120,0.06080000102519989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,6144,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,5120,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,4096,0.0496320016682148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,4096,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,3584,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,3072,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,4096,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,3072,0.04073600098490715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,3584,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,3584,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,2560,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,2560,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,3072,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,2048,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,2048,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,2048,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,2560,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,1536,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,1536,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,65536,0.5237439870834351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,1024,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,1536,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,768,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,1024,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,1024,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,768,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,512,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,512,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,256,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,768,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,256,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,512,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,128,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,32,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,32,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,64,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,32,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,12288,0.12169600278139114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,12288,0.12492799758911133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,16384,0.15942400693893433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,16384,0.1398400068283081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,10240,0.10719999670982361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,12288,0.10556799918413162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,10240,0.10473600029945374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,16384,0.16355200111865997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,8192,0.0851840004324913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,65536,0.620415985584259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,8192,0.08799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,10240,0.09011200070381165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,7168,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,8192,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,7168,0.081216000020504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,7168,0.06649599969387054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,6144,0.06860800087451935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,6144,0.06707199662923813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,5120,0.0613120011985302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,6144,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,5120,0.05923200026154518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,4096,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,4096,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,65536,0.593280017375946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,3584,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,5120,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,4096,0.041919998824596405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,3584,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,2560,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,3072,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,3072,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,2560,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,3072,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,3584,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,2560,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,65536,0.5237439870834351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,2048,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,1536,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,2048,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,1536,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,2048,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,1536,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,1024,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,768,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,1024,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,1024,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,768,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,512,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,512,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,768,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,512,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,256,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,256,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,128,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,64,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,32,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,32,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,32,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,12288,0.12188799679279327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,12288,0.1228799968957901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,16384,0.1515520066022873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,16384,0.157151997089386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,16384,0.13542400300502777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,10240,0.1130559965968132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,12288,0.10630399733781815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,10240,0.10489600151777267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,8192,0.0872960016131401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,8192,0.09071999788284302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,10240,0.0899519994854927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,7168,0.07795199751853943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,65536,0.5527039766311646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,8192,0.07407999783754349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,7168,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,6144,0.06886400282382965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,6144,0.06924799829721451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,7168,0.06684800237417221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,5120,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,5120,0.06006399914622307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,6144,0.05859199911355972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,5120,0.04992000013589859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,4096,0.04927999898791313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,4096,0.05084799975156784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,65536,0.570688009262085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,4096,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,3584,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,3584,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,3584,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,3072,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,3072,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,2560,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,3072,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,2560,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,65536,0.521120011806488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,2048,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,1536,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,2560,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,1536,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,2048,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,1536,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,1024,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,768,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,1024,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,768,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,1024,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,768,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,512,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,256,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,512,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,256,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,2048,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,32,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,64,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,128,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,32,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,32,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,16384,0.12691199779510498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,12288,0.10099200159311295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,16384,0.13443200290203094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,12288,0.10396800190210342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,16384,0.1308159977197647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,10240,0.0904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,12288,0.0974079966545105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,10240,0.08972799777984619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,8192,0.08569599688053131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,10240,0.0841279998421669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,65536,0.4773760139942169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,8192,0.07583999633789062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,8192,0.06844799965620041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,7168,0.06694400310516357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,7168,0.06796800345182419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,6144,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,7168,0.06224000081419945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,6144,0.061055999249219894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,5120,0.05241600051522255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,6144,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,5120,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,5120,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,4096,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,4096,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,4096,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,65536,0.4658240079879761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,3584,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,3584,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,3584,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,3072,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,3072,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,2560,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,65536,0.5236480236053467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,3072,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,2560,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,2560,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,2048,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,1536,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,1536,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,2048,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,2048,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,1536,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,1024,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,1024,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,1024,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,768,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,768,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,512,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,512,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,768,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,256,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,512,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,256,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,256,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,32,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,64,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,12288,0.06748799979686737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,16384,0.10681600123643875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,16384,0.0880960002541542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,16384,0.09907200187444687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,10240,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,12288,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,10240,0.06028800085186958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,12288,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,65536,0.31910398602485657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,8192,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,10240,0.06473600119352341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,8192,0.04838399961590767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,8192,0.05315199866890907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,7168,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,7168,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,6144,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,7168,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,6144,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,65536,0.31385600566864014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,6144,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,5120,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,4096,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,5120,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,4096,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,5120,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,4096,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,3584,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,3584,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,65536,0.3787519931793213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,3584,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,3072,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,3072,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,2560,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,2560,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,3072,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,2048,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,2560,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,2048,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,1536,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,2048,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,1536,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,1024,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,1024,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,1536,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,768,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,512,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,768,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,128,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,64,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,12288,0.06809599697589874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,12288,0.0692799985408783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,16384,0.08745600283145905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,16384,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,16384,0.09935999661684036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,12288,0.07692799717187881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,10240,0.05843200162053108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,10240,0.060127999633550644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,8192,0.04899200052022934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,8192,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,10240,0.06505600363016129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,7168,0.043455999344587326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,8192,0.053119998425245285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,65536,0.29043200612068176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,7168,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,6144,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,7168,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,6144,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,6144,0.048128001391887665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,5120,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,65536,0.28275200724601746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,5120,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,4096,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,4096,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,5120,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,3584,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,4096,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,3584,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,3072,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,65536,0.38473600149154663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,3584,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,3072,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,2560,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,2560,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,3072,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,2048,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,2560,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,2048,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,1536,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,2048,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,1536,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,1536,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,1024,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,768,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,1024,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,768,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,256,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,64,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,32,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,12288,0.08131200075149536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,16384,0.07804799824953079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,12288,0.0655680000782013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,16384,0.0799039974808693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,16384,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,12288,0.061824001371860504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,10240,0.06886400282382965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,10240,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,8192,0.04729599878191948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,10240,0.05257600173354149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,65536,0.24268800020217896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,65536,0.2438720017671585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,8192,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,7168,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,7168,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,8192,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,6144,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,6144,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,7168,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,5120,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,6144,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,5120,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,5120,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,4096,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,4096,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,3584,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,65536,0.32256001234054565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,4096,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,3072,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,3584,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,3072,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,3584,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,2560,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,3072,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,2048,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,2560,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,1536,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,2560,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,2048,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,1536,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,2048,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,1536,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,1024,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,1024,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,1024,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,512,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,12288,0.08060800284147263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,12288,0.061216000467538834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,16384,0.07152000069618225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,16384,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,16384,0.07807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,12288,0.061216000467538834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,10240,0.06860800087451935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,10240,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,10240,0.05222399905323982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,8192,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,8192,0.04992000013589859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,7168,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,8192,0.04310400038957596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,65536,0.2134079933166504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,65536,0.2086080014705658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,7168,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,7168,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,6144,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,6144,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,5120,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,6144,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,4096,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,5120,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,4096,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,3584,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,4096,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,3584,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,65536,0.2959679961204529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,3584,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,3072,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,2560,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,3072,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,3072,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,2560,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,5120,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,2048,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,2560,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,2048,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,1536,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,2048,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,1536,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,1024,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,1024,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,1536,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,1024,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,768,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,768,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,256,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,256,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,64,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,12288,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,12288,0.06780800223350525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,16384,0.08742400258779526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,16384,0.0613120011985302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,16384,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,12288,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,10240,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,10240,0.04614400118589401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,10240,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,8192,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,8192,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,65536,0.20956799387931824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,65536,0.20163199305534363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,7168,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,8192,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,7168,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,6144,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,6144,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,7168,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,6144,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,5120,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,5120,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,4096,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,65536,0.20451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,3584,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,3584,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,4096,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,3072,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,3072,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,2560,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,2048,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,2560,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,2048,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,1536,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,768,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,1024,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,768,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,12288,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,16384,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,16384,0.06275200098752975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,16384,0.057023998349905014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,12288,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,32,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,12288,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,10240,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,65536,0.1424960047006607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,65536,0.15859200060367584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,8192,0.047520000487565994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,10240,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,8192,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,7168,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,7168,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,8192,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,6144,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,6144,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,5120,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,6144,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,5120,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,4096,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,65536,0.20214399695396423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,4096,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,5120,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,3584,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,3584,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,3072,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,3072,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,2560,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,2560,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,2048,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,2048,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,1536,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,1024,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,1024,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,1024,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,12288,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,12288,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,16384,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,16384,0.05344000086188316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,16384,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,10240,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,12288,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,10240,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,8192,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,10240,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,7168,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,65536,0.12176000326871872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,8192,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,65536,0.11423999816179276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,7168,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,6144,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,6144,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,6144,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,5120,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,5120,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,5120,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,65536,0.11817599833011627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,4096,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,4096,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,3584,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,3584,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,3584,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,3072,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,3072,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,2560,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,3072,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,2048,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,2048,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,1536,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,768,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,2560,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,12288,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,12288,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,16384,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,16384,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,16384,0.05692800134420395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,12288,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,10240,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,10240,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,8192,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,10240,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,8192,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,8192,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,65536,0.09142400324344635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,7168,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,65536,0.09196799993515015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,7168,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,6144,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,6144,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,7168,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,6144,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,5120,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,5120,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,4096,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,65536,0.11760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,5120,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,3584,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,4096,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,3584,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,3072,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,3072,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,2560,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,2048,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,2048,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,768,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,16384,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,16384,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,12288,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,16384,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,12288,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,10240,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,10240,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,10240,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,65536,0.08025600016117096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,12288,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,8192,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,8192,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,65536,0.07731200009584427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,7168,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,8192,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,7168,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,7168,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,6144,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,6144,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,6144,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,65536,0.11593600362539291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,5120,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,5120,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,5120,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,4096,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,4096,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,3584,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,2560,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,2048,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,2048,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,1536,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,12288,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,12288,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,16384,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,16384,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,16384,0.05507199838757515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,12288,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,10240,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,10240,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,10240,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,8192,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,8192,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,7168,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,65536,0.07769600301980972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,65536,0.062111999839544296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,8192,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,6144,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,7168,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,6144,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,5120,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,4096,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,65536,0.11577600240707397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,5120,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,4096,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,3584,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,3584,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,2048,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,3072,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,2560,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,2048,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,1024,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,768,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,768,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,12288,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,16384,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,12288,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,16384,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,16384,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,12288,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,10240,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,10240,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,8192,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,10240,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,8192,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,65536,0.0544000007212162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,7168,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,8192,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,7168,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,65536,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,7168,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,6144,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,6144,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,5120,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,4096,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,65536,0.11737599968910217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,5120,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,4096,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,3584,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,3072,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,2048,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,2560,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,2048,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,1536,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,64,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,12288,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,12288,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,16384,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,16384,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,16384,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,12288,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,10240,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,10240,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,10240,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,8192,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,8192,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,65536,0.05411199852824211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,8192,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,7168,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,6144,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,65536,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,7168,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,6144,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,6144,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,65536,0.11747200042009354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,5120,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,5120,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,4096,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,5120,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,4096,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,7168,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,3072,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,3584,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,2048,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,2560,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,2048,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,1536,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,1536,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,2048,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,1024,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,768,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,768,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,64,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,64,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,32,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,12288,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,12288,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,16384,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,16384,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,16384,0.05526399984955788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,12288,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,10240,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,10240,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,10240,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,8192,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,8192,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,7168,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,8192,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,65536,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,65536,0.052352000027894974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,6144,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,7168,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,7168,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,6144,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,5120,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,6144,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,65536,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,4096,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,4096,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,4096,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,3584,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,2560,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,3072,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,2560,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,2048,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,2048,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,2048,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,1536,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,64,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,32,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,64,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,12288,1.0054080486297607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,12288,1.7297919988632202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,10240,1.5480639934539795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,16384,0.7900159955024719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,10240,0.5150719881057739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,10240,0.7993280291557312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,7168,1.1323519945144653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,8192,1.208191990852356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,16384,1.3349759578704834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,12288,0.6123200058937073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,8192,1.2416960000991821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,6144,0.4758079946041107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,16384,1.2202880382537842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,8192,0.4156799912452698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,5120,0.40115201473236084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,7168,0.6407999992370605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,5120,0.8408640027046204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,4096,0.3296000063419342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,4096,0.3883199989795685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,6144,0.5544319748878479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,3584,0.3381440043449402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,7168,0.3717440068721771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,3072,0.2531520128250122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,6144,0.3253439962863922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,3584,0.2917439937591553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,5120,0.2752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,2560,0.24700799584388733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,4096,0.22515200078487396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,3072,0.5454080104827881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,3584,0.2661759853363037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,2560,0.2157759964466095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,2048,0.4017280042171478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,3072,0.23449599742889404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,1536,0.28806400299072266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,2048,0.3842880129814148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,2560,0.2024960070848465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,1536,0.2890239953994751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,1536,0.137472003698349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,2048,0.13212800025939941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,1024,0.19791999459266663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,768,0.13913600146770477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,1024,0.19327999651432037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,768,0.08953599631786346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,512,0.09702400118112564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,768,0.13843199610710144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,1024,0.08287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,512,0.09836799651384354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,256,0.048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,128,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,256,0.0469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,512,0.07500799745321274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,128,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,256,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,64,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,64,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,128,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,32,0.04278400167822838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,32,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,64,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,32,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,12288,0.2761920094490051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,16384,0.35231998562812805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,12288,0.2377920001745224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,10240,0.20217600464820862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,16384,0.31404799222946167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,10240,0.20739200711250305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,16384,0.2708800137042999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,12288,0.20576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,8192,0.16630400717258453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,8192,0.164000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,65536,2.5064640045166016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,7168,0.14425599575042725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,10240,0.1753920018672943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,8192,0.14256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,7168,0.14876799285411835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,6144,0.12537600100040436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,6144,0.13663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,7168,0.12703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,5120,0.11046399921178818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,5120,0.107744000852108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,6144,0.11219199746847153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,4096,0.08873599767684937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,4096,0.0912960022687912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,5120,0.09375999867916107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,3584,0.08009599894285202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,4096,0.07887999713420868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,3072,0.1342719942331314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,3584,0.08383999764919281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,3072,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,65536,1.4776639938354492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,2560,0.11404799669981003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,2560,0.11753600090742111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,2048,0.09353599697351456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,3072,0.0639680027961731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,2048,0.100832000374794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,2560,0.05536000058054924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,1536,0.07526399940252304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,1536,0.0721919983625412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,2048,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,1536,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,1024,0.05119999870657921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,1024,0.049536000937223434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,3584,0.07145600020885468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,768,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,768,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,1024,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,512,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,512,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,768,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,512,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,256,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,256,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,256,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,128,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,128,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,65536,1.0415680408477783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,128,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,64,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,64,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,32,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,64,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,32,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,12288,0.23942400515079498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,16384,0.33632001280784607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,16384,0.28620800375938416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,12288,0.22947199642658234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,16384,0.2683199942111969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,10240,0.31244799494743347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,10240,0.2964800000190735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,12288,0.20556800067424774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,65536,1.1048959493637085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,8192,0.258976012468338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,10240,0.17561599612236023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,8192,0.25808000564575195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,7168,0.24115200340747833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,65536,1.6821759939193726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,65536,1.040671944618225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,6144,0.19116799533367157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,7168,0.23071999847888947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,7168,0.1255040019750595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,8192,0.14217600226402283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,6144,0.20153599977493286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,5120,0.16467200219631195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,6144,0.11097600311040878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,4096,0.13625599443912506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,4096,0.13894400000572205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,5120,0.16857600212097168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,5120,0.09379199892282486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,4096,0.06585600227117538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,3584,0.12012799829244614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,3584,0.11830399930477142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,3072,0.10067199915647507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,2560,0.08713600039482117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,3072,0.10252799838781357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,2560,0.08607999980449677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,3584,0.0716480016708374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,3072,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,2560,0.04620800167322159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,2048,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,2048,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,1536,0.05491200089454651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,1536,0.05619199946522713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,2048,0.07574400305747986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,1536,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,1024,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,1024,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,768,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,768,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,1024,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,512,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,768,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,512,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,256,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,256,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,512,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,128,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,256,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,128,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,128,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,64,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,64,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,32,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,32,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,16384,0.24383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,16384,0.27190399169921875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,12288,0.20153599977493286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,12288,0.14723199605941772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,10240,0.1770240068435669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,16384,0.2295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,10240,0.16096000373363495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,65536,1.1459200382232666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,12288,0.2059839963912964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,8192,0.14099200069904327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,10240,0.12812800705432892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,8192,0.13049599528312683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,65536,0.9655359983444214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,7168,0.11734399944543839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,8192,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,7168,0.11667200177907944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,6144,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,7168,0.09494400024414062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,6144,0.10553599894046783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,5120,0.09347199648618698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,5120,0.09251199662685394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,4096,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,6144,0.07859200239181519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,5120,0.06707199662923813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,4096,0.07747200131416321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,3584,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,3584,0.06672000139951706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,4096,0.05552000179886818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,3072,0.06348799914121628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,3584,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,2560,0.0764480009675026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,3072,0.091839998960495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,2560,0.07945600152015686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,65536,0.944927990436554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,3072,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,2560,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,2048,0.06361600011587143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,2048,0.06547199934720993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,1536,0.04966399818658829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,2048,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,1536,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,1024,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,1024,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,1536,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,1024,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,768,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,768,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,512,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,768,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,512,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,256,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,512,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,256,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,256,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,128,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,128,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,128,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,64,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,64,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,32,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,32,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,12288,0.12489599734544754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,12288,0.12291199713945389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,16384,0.32707199454307556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,16384,0.16035200655460358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,10240,0.10623999685049057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,10240,0.10662399977445602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,16384,0.1393599957227707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,12288,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,8192,0.0859839990735054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,8192,0.17084799706935883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,7168,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,10240,0.09017600119113922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,8192,0.07401599735021591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,65536,1.2247999906539917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,7168,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,6144,0.06739199906587601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,7168,0.06572800129652023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,6144,0.06992000341415405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,5120,0.11299200356006622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,5120,0.05833600088953972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,6144,0.058368001133203506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,65536,1.1590399742126465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,5120,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,4096,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,4096,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,3584,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,4096,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,3584,0.08166400343179703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,3072,0.06719999760389328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,3584,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,3072,0.06761600077152252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,2560,0.05923200026154518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,2560,0.05849599838256836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,2560,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,3072,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,2048,0.05167999863624573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,65536,0.5257920026779175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,2048,0.04742399975657463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,1536,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,2048,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,1536,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,1024,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,1024,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,768,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,1536,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,768,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,512,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,512,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,768,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,256,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,512,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,256,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,256,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,128,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,128,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,128,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,64,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,1024,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,64,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,32,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,32,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,12288,0.12323199957609177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,12288,0.13238400220870972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,16384,0.1648319959640503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,12288,0.10627199709415436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,16384,0.1610880047082901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,10240,0.1034879982471466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,16384,0.13929599523544312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,10240,0.10719999670982361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,8192,0.08739200234413147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,65536,0.7597119808197021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,8192,0.08556800335645676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,65536,0.6025279760360718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,10240,0.09068799763917923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,7168,0.0772159993648529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,8192,0.07398399710655212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,7168,0.07711999863386154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,65536,0.5244799852371216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,6144,0.0676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,7168,0.06585600227117538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,5120,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,6144,0.069023996591568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,5120,0.058720000088214874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,6144,0.058079998940229416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,4096,0.04918399825692177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,5120,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,4096,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,4096,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,3584,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,3584,0.0660799965262413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,3072,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,3072,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,3584,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,3072,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,2560,0.056223999708890915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,2560,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,2560,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,2048,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,2048,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,1536,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,1536,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,2048,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,1536,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,1024,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,1024,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,1024,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,768,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,768,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,512,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,512,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,768,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,512,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,256,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,256,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,128,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,256,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,128,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,128,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,32,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,32,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,64,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,32,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,12288,0.13760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,12288,0.09647999703884125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,16384,0.12012799829244614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,16384,0.17897599935531616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,16384,0.1383039951324463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,10240,0.08214399963617325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,10240,0.17401599884033203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,12288,0.10559999942779541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,10240,0.09001599997282028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,65536,0.6791999936103821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,8192,0.1409280002117157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,8192,0.14032000303268433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,7168,0.07795199751853943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,8192,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,6144,0.10598400235176086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,6144,0.06870400160551071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,7168,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,5120,0.09523200243711472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,5120,0.0589120015501976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,6144,0.05820799991488457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,4096,0.04851200059056282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,5120,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,65536,0.6755200028419495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,7168,0.0759039968252182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,4096,0.07513599842786789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,3584,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,3584,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,4096,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,65536,0.5231360197067261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,3072,0.06070400029420853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,3072,0.0647680014371872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,3072,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,3584,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,2560,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,2560,0.0506879985332489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,2560,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,2048,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,2048,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,2048,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,1536,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,1536,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,1024,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,1536,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,1024,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,768,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,1024,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,768,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,768,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,512,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,512,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,512,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,256,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,256,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,256,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,32,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,32,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,64,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,12288,0.12211199849843979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,16384,0.15929600596427917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,12288,0.0809599980711937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,16384,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,16384,0.11052799969911575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,10240,0.1034879982471466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,12288,0.08432000130414963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,10240,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,65536,0.6196799874305725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,65536,0.5843520164489746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,10240,0.07283200323581696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,8192,0.08582399785518646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,8192,0.13363200426101685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,7168,0.07689599692821503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,8192,0.059487998485565186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,7168,0.12278400361537933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,6144,0.059967998415231705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,6144,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,7168,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,6144,0.05344000086188316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,5120,0.08665599673986435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,5120,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,3584,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,4096,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,4096,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,5120,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,4096,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,3584,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,3584,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,3072,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,3072,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,65536,0.5237439870834351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,2560,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,2560,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,2560,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,3072,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,2048,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,2048,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,1536,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,1024,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,1536,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,2048,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,1536,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,1024,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,768,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,1024,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,768,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,768,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,512,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,512,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,256,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,128,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,32,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,32,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,12288,0.13023999333381653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,16384,0.16524800658226013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,16384,0.11446399986743927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,12288,0.081727996468544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,16384,0.09782399982213974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,10240,0.07683199644088745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,12288,0.07651200145483017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,10240,0.10710400342941284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,10240,0.06489600241184235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,8192,0.091839998960495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,8192,0.08540800213813782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,65536,0.3718400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,65536,0.3747200071811676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,7168,0.08083199709653854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,8192,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,7168,0.0817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,6144,0.06982400268316269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,6144,0.07052800059318542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,5120,0.05692800134420395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,7168,0.04800000041723251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,5120,0.060416001826524734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,6144,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,4096,0.050624001771211624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,4096,0.05209600180387497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,5120,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,4096,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,3584,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,3584,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,3072,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,3584,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,3072,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,65536,0.36934399604797363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,3072,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,2560,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,2560,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,2048,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,1536,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,2048,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,2048,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,2560,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,1536,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,1536,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,1024,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,1024,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,768,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,768,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,1024,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,512,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,512,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,768,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,512,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,256,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,128,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,32,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,12288,0.10857599973678589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,12288,0.08505599945783615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,16384,0.09798400104045868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,16384,0.09689600020647049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,10240,0.07894399762153625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,12288,0.07596799731254578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,16384,0.09891200065612793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,10240,0.09852799773216248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,8192,0.09040000289678574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,10240,0.06534399837255478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,8192,0.06595200300216675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,8192,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,65536,0.33107200264930725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,7168,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,7168,0.05430399999022484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,7168,0.07599999755620956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,6144,0.06752000004053116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,65536,0.31094399094581604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,6144,0.06860800087451935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,5120,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,6144,0.047807998955249786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,4096,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,5120,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,4096,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,5120,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,65536,0.36976000666618347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,4096,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,3584,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,3072,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,3072,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,3072,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,3584,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,2560,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,2560,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,2048,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,2560,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,3584,0.04396799951791763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,2048,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,1536,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,2048,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,1536,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,1024,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,1536,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,768,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,1024,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,1024,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,768,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,768,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,512,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,512,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,256,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,512,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,64,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,32,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,12288,0.08137600123882294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,16384,0.10400000214576721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,16384,0.08534400165081024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,12288,0.0745600014925003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,16384,0.07900799810886383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,10240,0.06828799843788147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,12288,0.061055999249219894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,10240,0.0589120015501976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,10240,0.052928000688552856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,8192,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,8192,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,7168,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,8192,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,65536,0.3894079923629761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,65536,0.2975040078163147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,7168,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,6144,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,65536,0.28115200996398926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,6144,0.061344001442193985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,5120,0.05167999863624573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,7168,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,5120,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,6144,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,5120,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,4096,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,4096,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,3584,0.038943998515605927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,4096,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,3584,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,3072,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,3584,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,3072,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,3072,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,2560,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,2560,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,2048,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,2048,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,2560,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,2048,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,1536,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,1536,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,1536,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,1024,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,768,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,1024,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,1024,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,768,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,768,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,32,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,12288,0.06431999802589417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,16384,0.07321599870920181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,12288,0.06169600039720535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,16384,0.07430399954319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,16384,0.05801599845290184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,12288,0.05040000006556511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,10240,0.06598400324583054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,10240,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,8192,0.07391999661922455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,8192,0.04931199923157692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,65536,0.18572799861431122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,65536,0.21609599888324738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,10240,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,7168,0.06611199676990509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,8192,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,7168,0.06403200328350067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,6144,0.05878400057554245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,7168,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,6144,0.056095998734235764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,6144,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,5120,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,5120,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,4096,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,5120,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,4096,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,3584,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,4096,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,3584,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,3584,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,3072,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,3072,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,2560,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,2560,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,2048,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,2560,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,2048,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,1536,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,1536,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,1024,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,1536,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,65536,0.20950399339199066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,1024,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,12288,0.08633600175380707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,12288,0.06169600039720535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,16384,0.05337600037455559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,16384,0.07161600142717361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,16384,0.056543998420238495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,10240,0.06716799736022949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,12288,0.04460800066590309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,10240,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,8192,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,10240,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,65536,0.20681600272655487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,8192,0.05641600117087364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,8192,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,7168,0.046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,7168,0.04662400111556053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,65536,0.17795200645923615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,7168,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,6144,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,6144,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,5120,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,5120,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,6144,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,4096,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,4096,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,65536,0.20319999754428864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,5120,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,3584,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,3584,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,2560,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,3072,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,2560,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,2048,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,1536,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,1024,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,1024,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,768,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,2048,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,12288,0.050624001771211624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,12288,0.07072000205516815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,16384,0.06070400029420853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,16384,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,16384,0.056832000613212585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,12288,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,10240,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,10240,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,10240,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,8192,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,65536,0.16991999745368958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,8192,0.048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,7168,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,65536,0.190528005361557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,7168,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,6144,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,6144,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,7168,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,5120,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,6144,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,5120,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,5120,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,4096,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,4096,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,3584,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,3584,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,65536,0.20444799959659576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,3072,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,3072,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,2048,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,2560,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,1536,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,2048,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,1536,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,1024,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,1024,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,768,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,768,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,12288,0.0424639992415905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,12288,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,16384,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,16384,0.042688000947237015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,16384,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,12288,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,10240,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,8192,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,10240,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,8192,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,65536,0.11856000125408173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,65536,0.09404800087213516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,8192,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,7168,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,65536,0.09359999746084213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,7168,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,6144,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,6144,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,10240,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,5120,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,5120,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,4096,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,5120,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,4096,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,3072,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,2560,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,1536,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,1536,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,1024,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,768,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,768,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,12288,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,12288,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,16384,0.0416640006005764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,16384,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,16384,0.05651199817657471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,12288,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,10240,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,10240,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,8192,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,10240,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,8192,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,65536,0.1111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,8192,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,7168,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,65536,0.08108799904584885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,7168,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,6144,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,7168,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,6144,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,5120,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,5120,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,65536,0.11715199798345566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,4096,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,4096,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,5120,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,4096,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,3072,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,2048,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,64,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,12288,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,12288,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,16384,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,16384,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,12288,0.043776001781225204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,10240,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,10240,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,8192,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,8192,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,65536,0.06806399673223495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,7168,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,65536,0.07865600287914276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,8192,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,7168,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,6144,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,7168,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,6144,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,6144,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,5120,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,5120,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,4096,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,4096,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,65536,0.11747200042009354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,3584,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,4096,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,3072,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,2560,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,2048,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,1536,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,768,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,12288,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,12288,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,16384,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,16384,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,16384,0.05552000179886818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,12288,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,10240,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,10240,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,10240,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,8192,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,8192,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,65536,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,65536,0.0642239972949028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,7168,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,7168,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,8192,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,6144,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,7168,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,6144,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,5120,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,5120,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,6144,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,65536,0.11654400080442429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,4096,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,3584,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,3072,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,3072,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,3072,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,5120,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,2048,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,2560,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,1024,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,1536,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,64,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,12288,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,12288,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,16384,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,16384,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,16384,0.05507199838757515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,10240,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,12288,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,8192,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,10240,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,65536,0.04742399975657463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,65536,0.04569600149989128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,8192,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,7168,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,8192,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,6144,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,6144,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,7168,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,5120,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,6144,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,5120,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,65536,0.11699199676513672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,5120,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,4096,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,3584,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,3584,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,3072,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,3072,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,2048,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,2560,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,2048,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,1536,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,2048,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,1024,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,768,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,64,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,12288,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,12288,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,16384,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,16384,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,16384,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,12288,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,10240,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,10240,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,10240,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,8192,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,8192,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,8192,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,7168,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,7168,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,65536,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,7168,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,6144,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,6144,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,5120,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,6144,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,65536,0.046560000628232956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,5120,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,65536,0.11753600090742111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,5120,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,4096,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,4096,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,3584,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,4096,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,3584,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,3072,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,3072,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,2560,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,1536,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,1536,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,768,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,64,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,12288,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,12288,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,16384,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,16384,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,12288,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,16384,0.05161599814891815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,10240,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,8192,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,10240,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,8192,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,10240,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,65536,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,65536,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,8192,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,7168,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,7168,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,6144,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,6144,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,7168,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,6144,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,5120,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,65536,0.1117120012640953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,5120,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,3584,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,4096,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,3584,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,3072,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,2560,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,2560,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,2048,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,1536,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,1536,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,64,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,64,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,10240,0.39923200011253357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,16384,0.6586560010910034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,10240,0.390720009803772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,16384,0.616320013999939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,12288,0.4696959853172302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,8192,0.323743999004364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,8192,0.3169279992580414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,12288,0.4678080081939697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,12288,0.40857601165771484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,7168,0.2825919985771179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,7168,0.2879680097103119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,10240,0.3447679877281189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,6144,0.24297599494457245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,16384,0.5407360196113586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,8192,0.28143998980522156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,5120,0.2080959975719452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,6144,0.24662399291992188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,7168,0.2516160011291504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,4096,0.16819199919700623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,4096,0.1722559928894043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,6144,0.21663999557495117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,5120,0.20899200439453125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,3584,0.15292799472808838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,3584,0.1499200016260147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,5120,0.1860480010509491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,4096,0.15430399775505066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,2560,0.11327999830245972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,3584,0.13859200477600098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,3072,0.1313599944114685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,3072,0.13222399353981018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,2560,0.1133119985461235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,2048,0.09436800330877304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,3072,0.12198399752378464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,2048,0.0920960009098053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,1536,0.07267200201749802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,2560,0.10566399991512299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,2048,0.09062399715185165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,1024,0.0530879981815815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,1536,0.07465600222349167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,1024,0.052928000688552856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,768,0.043616000562906265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,1536,0.07440000027418137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,768,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,1024,0.058111999183893204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,512,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,512,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,768,0.04927999898791313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,512,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,256,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,256,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,128,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,128,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,256,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,64,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,128,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,32,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,64,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,32,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,64,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,32,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,12288,0.1228799968957901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,16384,0.15984000265598297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,16384,0.16300800442695618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,16384,0.1401280015707016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,12288,0.10787200182676315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,10240,0.11132799834012985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,10240,0.10553599894046783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,12288,0.13222399353981018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,65536,0.657151997089386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,8192,0.08643200248479843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,10240,0.0904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,8192,0.0902400016784668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,7168,0.07811199873685837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,7168,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,65536,0.6484479904174805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,8192,0.07385600358247757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,6144,0.06803199648857117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,6144,0.06806399673223495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,7168,0.06758400052785873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,5120,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,5120,0.05951999872922897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,6144,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,4096,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,4096,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,5120,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,3584,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,4096,0.04335999861359596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,3584,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,3072,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,3072,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,3584,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,3072,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,2560,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,2560,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,2560,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,2048,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,2048,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,1536,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,1536,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,2048,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,1536,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,1024,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,1024,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,65536,0.5244479775428772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,768,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,768,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,1024,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,512,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,768,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,512,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,512,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,256,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,256,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,256,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,128,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,64,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,32,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,32,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,12288,0.1188800036907196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,12288,0.12492799758911133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,16384,0.1568640023469925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,16384,0.15187199413776398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,16384,0.14000000059604645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,10240,0.10492800176143646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,12288,0.1056319996714592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,10240,0.10179200023412704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,8192,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,8192,0.0857279971241951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,65536,0.5647040009498596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,8192,0.07366400212049484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,7168,0.07692799717187881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,10240,0.09043200314044952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,7168,0.07827199995517731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,65536,0.5522879958152771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,6144,0.06921599805355072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,6144,0.06739199906587601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,7168,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,5120,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,6144,0.05817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,5120,0.0589120015501976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,4096,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,4096,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,5120,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,3584,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,4096,0.04278400167822838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,3584,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,3072,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,3584,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,3072,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,3072,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,2560,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,2560,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,2048,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,2048,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,2560,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,2048,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,1536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,1536,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,1024,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,65536,0.5244799852371216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,1024,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,1536,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,1024,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,768,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,768,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,768,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,512,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,512,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,256,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,128,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,64,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,32,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,32,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,16384,0.13308799266815186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,16384,0.12812800705432892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,12288,0.10198400169610977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,12288,0.12460800260305405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,16384,0.13212800025939941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,10240,0.10761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,12288,0.09958399832248688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,10240,0.08668799698352814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,8192,0.07353600114583969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,65536,0.6234880089759827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,8192,0.09059199690818787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,10240,0.08416000008583069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,7168,0.06604799628257751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,8192,0.06982400268316269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,65536,0.46303999423980713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,7168,0.06755200028419495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,6144,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,6144,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,6144,0.054496001452207565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,5120,0.06070400029420853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,7168,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,5120,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,5120,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,4096,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,4096,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,3584,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,4096,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,3584,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,3584,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,3072,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,3072,0.037696000188589096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,2560,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,3072,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,2560,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,2048,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,65536,0.5241919755935669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,2560,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,2048,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,1536,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,1536,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,2048,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,1024,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,1536,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,768,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,1024,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,1024,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,768,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,768,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,512,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,512,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,512,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,256,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,32,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,32,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,64,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,32,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,12288,0.07043199986219406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,12288,0.07657600194215775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,16384,0.09484799951314926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,16384,0.09142400324344635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,16384,0.10870400071144104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,10240,0.0578560009598732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,12288,0.07744000107049942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,10240,0.06038400158286095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,10240,0.06761600077152252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,65536,0.31619200110435486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,7168,0.043776001781225204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,8192,0.07168000191450119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,8192,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,8192,0.05913599953055382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,65536,0.31327998638153076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,7168,0.06345599889755249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,6144,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,6144,0.05036799982190132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,7168,0.05027199909090996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,6144,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,5120,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,4096,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,4096,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,5120,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,4096,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,3584,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,3584,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,3584,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,3072,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,3072,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,3072,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,65536,0.4548799991607666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,2560,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,2560,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,5120,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,2048,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,2560,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,2048,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,2048,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,1536,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,1024,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,1536,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,1536,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,1024,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,768,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,512,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,256,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,64,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,32,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,12288,0.08214399963617325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,12288,0.06838399916887283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,16384,0.08876799792051315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,16384,0.08748800307512283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,16384,0.09955199807882309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,10240,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,10240,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,12288,0.07753600180149078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,10240,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,8192,0.07132799923419952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,8192,0.04854400083422661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,65536,0.28281599283218384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,8192,0.054016001522541046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,7168,0.043776001781225204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,7168,0.04473600164055824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,6144,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,6144,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,7168,0.050175998359918594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,5120,0.04819199815392494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,6144,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,5120,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,4096,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,4096,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,5120,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,65536,0.2775680124759674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,3584,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,3584,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,4096,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,65536,0.39692801237106323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,3584,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,3072,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,3072,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,2560,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,3072,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,2560,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,2048,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,2048,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,2560,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,1536,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,2048,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,1536,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,1536,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,1024,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,768,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,1024,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,768,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,768,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,512,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,64,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,12288,0.08089599758386612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,16384,0.07887999713420868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,16384,0.08064000308513641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,12288,0.08086399734020233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,32,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,16384,0.08287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,12288,0.06124800071120262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,10240,0.069023996591568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,10240,0.06921599805355072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,8192,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,65536,0.24726399779319763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,10240,0.052480001002550125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,8192,0.05737600103020668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,7168,0.049375999718904495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,65536,0.37727999687194824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,8192,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,6144,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,7168,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,7168,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,6144,0.05552000179886818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,5120,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,6144,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,5120,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,4096,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,4096,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,5120,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,4096,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,3584,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,3584,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,65536,0.3171840012073517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,3072,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,3072,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,3584,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,2560,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,3072,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,2560,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,2560,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,2048,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,2048,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,1536,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,1536,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,2048,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,1024,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,1536,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,768,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,1024,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,768,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,512,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,32,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,12288,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,12288,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,16384,0.10326399654150009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,16384,0.07311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,16384,0.07929600030183792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,12288,0.0615679994225502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,10240,0.0692799985408783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,10240,0.06940799951553345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,10240,0.052671998739242554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,8192,0.04956800118088722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,8192,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,65536,0.21270400285720825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,7168,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,65536,0.2072959989309311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,8192,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,7168,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,6144,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,6144,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,7168,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,5120,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,5120,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,6144,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,5120,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,4096,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,4096,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,3584,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,4096,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,3584,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,3584,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,3072,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,3072,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,65536,0.2977280020713806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,2560,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,2560,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,2048,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,2560,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,2048,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,1536,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,2048,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,1536,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,1024,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,1024,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,1536,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,768,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,1024,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,768,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,768,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,512,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,3072,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,64,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,32,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,12288,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,12288,0.06678400188684464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,16384,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,16384,0.06054399907588959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,16384,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,12288,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,10240,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,10240,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,10240,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,8192,0.038943998515605927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,8192,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,7168,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,7168,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,8192,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,65536,0.17628799378871918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,7168,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,6144,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,65536,0.17919999361038208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,6144,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,5120,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,5120,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,6144,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,4096,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,4096,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,65536,0.21807999908924103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,5120,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,3584,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,3584,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,3072,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,3072,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,2560,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,2560,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,2048,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,1536,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,1536,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,1024,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,1024,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,512,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,768,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,32,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,12288,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,16384,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,16384,0.06428799778223038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,16384,0.057023998349905014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,12288,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,10240,0.05724800005555153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,10240,0.04358400031924248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,65536,0.16035200655460358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,12288,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,8192,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,8192,0.047648001462221146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,10240,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,65536,0.17830400168895721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,8192,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,7168,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,7168,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,6144,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,6144,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,7168,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,5120,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,6144,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,65536,0.20396800339221954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,5120,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,4096,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,4096,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,5120,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,4096,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,3584,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,3584,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,3072,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,3072,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,2560,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,2560,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,2048,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,2048,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,1024,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,1024,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,768,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,12288,0.07068800181150436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,12288,0.055456001311540604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,16384,0.05222399905323982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,16384,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,12288,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,16384,0.05651199817657471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,10240,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,10240,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,10240,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,8192,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,8192,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,65536,0.14207999408245087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,8192,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,7168,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,7168,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,65536,0.14643199741840363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,6144,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,6144,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,7168,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,5120,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,5120,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,6144,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,65536,0.2025279998779297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,5120,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,4096,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,4096,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,3584,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,4096,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,3584,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,3584,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,3072,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,3072,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,2560,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,2560,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,2560,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,2048,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,1024,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,768,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,1024,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,12288,0.056223999708890915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,16384,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,12288,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,16384,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,16384,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,12288,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,10240,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,10240,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,10240,0.042047999799251556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,8192,0.0496320016682148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,8192,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,7168,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,65536,0.13129599392414093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,8192,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,7168,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,65536,0.12886400520801544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,6144,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,7168,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,6144,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,5120,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,6144,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,5120,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,4096,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,5120,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,3584,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,4096,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,65536,0.20454399287700653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,3584,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,3072,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,3072,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,2560,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,2048,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,1536,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,2048,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,1536,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,768,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,1024,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,768,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,64,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,32,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,12288,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,12288,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,16384,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,16384,0.043487999588251114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,16384,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,12288,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,10240,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,10240,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,10240,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,8192,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,65536,0.11369600147008896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,65536,0.11430399864912033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,8192,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,7168,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,6144,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,7168,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,6144,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,6144,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,5120,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,5120,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,65536,0.12009599804878235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,4096,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,7168,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,3584,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,4096,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,3584,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,3584,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,2560,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,2560,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,2048,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,1024,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,512,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,32,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,12288,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,12288,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,16384,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,16384,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,16384,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,10240,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,10240,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,12288,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,10240,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,8192,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,8192,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,65536,0.10390400141477585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,8192,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,65536,0.09324800223112106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,7168,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,6144,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,6144,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,5120,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,7168,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,6144,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,5120,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,4096,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,4096,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,65536,0.11830399930477142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,3584,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,3584,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,3072,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,3584,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,3072,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,2048,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,1536,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,1536,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,32,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,16384,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,12288,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,16384,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,16384,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,12288,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,12288,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,10240,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,10240,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,65536,0.07785599678754807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,8192,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,10240,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,65536,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,8192,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,8192,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,7168,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,6144,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,7168,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,7168,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,5120,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,6144,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,5120,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,65536,0.11785600334405899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,6144,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,3584,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,4096,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,5120,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,3584,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,3072,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,3072,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,2048,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,1536,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,1536,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,32,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,12288,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,12288,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,16384,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,16384,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,16384,0.05526399984955788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,12288,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,10240,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,10240,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,10240,0.036928001791238785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,8192,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,65536,0.06703999638557434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,8192,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,7168,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,8192,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,65536,0.06831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,7168,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,6144,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,7168,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,6144,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,5120,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,65536,0.1170239970088005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,4096,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,3584,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,2560,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,2048,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,12288,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,12288,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,16384,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,16384,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,16384,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,12288,0.043487999588251114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,10240,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,10240,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,8192,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,10240,0.0379519984126091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,8192,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,65536,0.07344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,8192,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,65536,0.0804160013794899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,7168,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,7168,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,6144,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,6144,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,7168,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,5120,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,6144,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,5120,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,65536,0.11692799627780914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,4096,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,3584,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,4096,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,3584,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,2560,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,2048,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,2560,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,2048,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,1536,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,1024,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,64,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,12288,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,12288,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,16384,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,16384,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,12288,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,10240,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,10240,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,10240,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,8192,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,65536,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,8192,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,65536,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,16384,0.05532800033688545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,7168,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,7168,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,8192,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,6144,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,7168,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,65536,0.11648000031709671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,6144,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,5120,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,6144,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,5120,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,4096,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,3584,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,3584,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,3072,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,3072,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,3072,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,2560,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,2048,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,1536,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,12288,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,16384,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,16384,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,12288,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,16384,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,12288,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,65536,0.04665600135922432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,10240,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,65536,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,10240,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,10240,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,8192,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,8192,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,65536,0.21104000508785248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,7168,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,7168,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,7168,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,6144,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,6144,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,6144,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,5120,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,5120,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,4096,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,4096,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,3584,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,3584,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,3584,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,3072,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,3072,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,3072,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,2560,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,2560,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,2560,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,1536,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,2048,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,2048,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,1536,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,1024,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,32,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,16384,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,16384,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,65536,0.04608000069856644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,12288,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,16384,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,12288,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,65536,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,12288,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,10240,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,10240,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,7168,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,65536,0.11609599739313126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,10240,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,8192,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,8192,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,8192,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,7168,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,7168,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,6144,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,6144,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,6144,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,5120,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,5120,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,5120,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,3584,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,4096,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,3584,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,3072,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,2560,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,2560,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,2560,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,2048,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,1536,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,512,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,256,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,16384,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,16384,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,12288,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,65536,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,16384,0.05507199838757515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,12288,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,65536,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,12288,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,10240,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,65536,0.20396800339221954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,10240,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,8192,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,8192,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,8192,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,6144,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,7168,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,10240,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,7168,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,7168,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,6144,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,6144,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,5120,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,4096,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,5120,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,4096,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,3584,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,3584,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,3072,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,2560,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,2560,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,2048,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,2048,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,1536,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,2048,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,512,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,64,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,12288,0.5553280115127563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,10240,0.42924800515174866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,16384,1.18886399269104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,10240,0.4403519928455353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,8192,0.3296000063419342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,12288,0.9666879773139954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,16384,0.7002559900283813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,12288,0.4078719913959503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,10240,0.3457919955253601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,7168,0.28569599986076355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,8192,0.3233279883861542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,16384,0.5324159860610962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,8192,0.28431999683380127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,6144,0.2428479939699173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,7168,0.34006398916244507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,5120,0.20947200059890747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,7168,0.24748800694942474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,6144,0.2504960000514984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,5120,0.20880000293254852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,4096,0.1730239987373352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,4096,0.18915200233459473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,6144,0.21753600239753723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,3584,0.15078400075435638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,5120,0.18611200153827667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,3584,0.1536639928817749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,3072,0.24553599953651428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,4096,0.15164799988269806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,3072,0.13353599607944489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,3584,0.1358720064163208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,3072,0.12070400267839432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,2560,0.11420799791812897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,2560,0.19728000462055206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,2560,0.10518400371074677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,2048,0.16991999745368958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,2048,0.16966399550437927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,1536,0.13603200018405914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,1024,0.08857599645853043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,1536,0.12863999605178833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,1024,0.08694399893283844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,2048,0.08883199840784073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,1536,0.07222399860620499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,768,0.06831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,768,0.06496000289916992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,1024,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,512,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,512,0.04809600114822388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,768,0.04899200052022934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,512,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,256,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,256,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,128,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,256,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,128,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,64,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,64,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,32,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,64,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,32,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,32,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,128,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,12288,0.12355200201272964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,16384,0.3065600097179413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,12288,0.12572799623012543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,16384,0.18268799781799316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,16384,0.13891200721263885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,12288,0.10860799998044968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,10240,0.10515200346708298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,10240,0.19334399700164795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,8192,0.08646400272846222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,8192,0.15343999862670898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,10240,0.09065599739551544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,7168,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,7168,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,8192,0.07497599720954895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,6144,0.07203199714422226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,6144,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,7168,0.06831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,5120,0.0594559982419014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,5120,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,6144,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,4096,0.08054400235414505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,65536,0.7005760073661804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,4096,0.04950400069355965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,5120,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,4096,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,3584,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,3584,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,3072,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,3584,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,3072,0.06207999959588051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,65536,0.7197120189666748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,2560,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,2560,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,65536,0.5209280252456665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,3072,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,2048,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,2048,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,2560,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,2048,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,1536,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,1536,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,1536,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,1024,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,1024,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,768,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,1024,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,768,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,768,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,512,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,512,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,256,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,512,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,256,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,128,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,128,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,256,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,128,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,64,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,64,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,64,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,32,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,32,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,32,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,12288,0.13964800536632538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,12288,0.1231359988451004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,16384,0.17075200378894806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,16384,0.16102400422096252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,16384,0.1382720023393631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,10240,0.10735999792814255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,12288,0.10758399963378906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,8192,0.09651199728250504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,10240,0.11151999980211258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,10240,0.09052799642086029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,8192,0.13756799697875977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,65536,0.6176959872245789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,7168,0.07744000107049942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,7168,0.12272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,8192,0.07519999891519547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,6144,0.06719999760389328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,7168,0.06656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,65536,0.5763520002365112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,6144,0.09718400239944458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,5120,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,5120,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,6144,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,5120,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,4096,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,4096,0.0756480023264885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,3584,0.06665600091218948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,3584,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,4096,0.04233599826693535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,3072,0.055424001067876816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,3072,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,2560,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,3072,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,2560,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,2560,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,2048,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,2048,0.04169600084424019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,2048,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,65536,0.5212159752845764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,1536,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,1536,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,3584,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,1024,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,1536,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,1024,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,768,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,768,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,1024,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,768,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,512,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,512,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,512,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,256,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,256,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,128,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,256,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,128,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,64,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,64,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,64,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,32,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,32,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,12288,0.11715199798345566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,12288,0.10732799768447876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,16384,0.1425279974937439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,16384,0.14000000059604645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,10240,0.08739200234413147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,12288,0.0904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,10240,0.10655999928712845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,16384,0.13023999333381653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,8192,0.07785599678754807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,10240,0.07462400197982788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,7168,0.07132799923419952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,8192,0.07459200173616409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,65536,0.625216007232666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,8192,0.0700799971818924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,7168,0.07065600156784058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,6144,0.0984639972448349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,6144,0.06163199990987778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,5120,0.05459199845790863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,6144,0.05567999929189682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,7168,0.05523199960589409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,5120,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,5120,0.04652800038456917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,4096,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,4096,0.04662400111556053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,3584,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,3584,0.041919998824596405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,4096,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,3072,0.051263999193906784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,3584,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,65536,0.48339200019836426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,3072,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,3072,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,2560,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,65536,0.5214719772338867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,2560,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,2048,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,2048,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,1536,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,2560,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,1536,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,2048,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,1024,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,1536,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,1024,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,768,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,1024,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,768,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,512,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,512,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,768,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,512,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,256,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,256,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,128,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,128,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,64,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,64,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,32,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,32,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,32,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,12288,0.08851200342178345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,12288,0.11209599673748016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,16384,0.1526080071926117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,16384,0.10598400235176086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,10240,0.09337600320577621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,10240,0.10047999769449234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,12288,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,10240,0.06515199691057205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,8192,0.06691200286149979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,8192,0.07699199765920639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,65536,0.3205440044403076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,7168,0.07391999661922455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,8192,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,65536,0.36236798763275146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,16384,0.10716799646615982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,7168,0.07276800274848938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,65536,0.38889598846435547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,6144,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,6144,0.06924799829721451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,5120,0.05299200117588043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,7168,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,6144,0.042399998754262924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,5120,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,4096,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,5120,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,4096,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,4096,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,3584,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,3584,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,3072,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,3584,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,3072,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,3072,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,2560,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,2560,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,2048,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,2560,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,1536,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,2048,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,2048,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,1536,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,1024,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,1536,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,1024,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,1024,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,768,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,768,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,512,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,512,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,768,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,512,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,256,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,128,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,128,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,64,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,64,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,64,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,32,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,32,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,16384,0.09987200051546097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,12288,0.11075200140476227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,12288,0.10886400192975998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,16384,0.10060799866914749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,16384,0.09759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,12288,0.07753600180149078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,10240,0.09232000261545181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,10240,0.0907839983701706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,8192,0.07286400347948074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,10240,0.06518399715423584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,8192,0.07817599922418594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,7168,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,7168,0.06943999975919724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,8192,0.05452800169587135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,7168,0.04745600000023842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,65536,0.29760000109672546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,6144,0.06108799949288368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,5120,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,6144,0.0605119988322258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,5120,0.05004800111055374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,6144,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,65536,0.276095986366272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,4096,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,4096,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,5120,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,65536,0.3786559998989105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,3584,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,3584,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,4096,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,3584,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,2560,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,2560,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,3072,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,3072,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,3072,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,2560,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,2048,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,1536,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,1536,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,2048,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,1024,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,1536,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,1024,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,1024,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,768,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,512,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,768,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,768,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,512,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,64,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,2048,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,64,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,32,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,32,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,12288,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,12288,0.07887999713420868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,16384,0.10489600151777267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,16384,0.10726399719715118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,16384,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,12288,0.06348799914121628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,10240,0.0695360004901886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,10240,0.06531199812889099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,10240,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,8192,0.06956800073385239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,8192,0.07305599749088287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,7168,0.0748480036854744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,7168,0.05539200082421303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,8192,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,65536,0.377375990152359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,7168,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,6144,0.059647999703884125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,6144,0.055424001067876816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,5120,0.044704001396894455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,65536,0.2709760069847107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,5120,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,6144,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,4096,0.043487999588251114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,5120,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,4096,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,4096,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,65536,0.29625600576400757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,3584,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,3584,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,3584,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,3072,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,3072,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,3072,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,2560,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,2560,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,2048,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,2048,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,2560,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,1536,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,2048,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,1536,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,1024,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,1536,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,1024,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,1024,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,768,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,768,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,512,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,512,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,256,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,256,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,128,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,12288,0.07939200103282928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,12288,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,16384,0.07315199822187424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,16384,0.07273600250482559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,16384,0.05817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,12288,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,10240,0.07001599669456482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,8192,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,8192,0.056223999708890915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,10240,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,65536,0.37641599774360657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,7168,0.06038400158286095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,7168,0.057151999324560165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,8192,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,65536,0.22310400009155273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,10240,0.0551999993622303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,6144,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,7168,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,6144,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,5120,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,5120,0.05110400170087814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,6144,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,65536,0.2152319997549057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,4096,0.04473600164055824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,4096,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,3584,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,5120,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,3584,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,4096,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,3072,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,3072,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,3584,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,2560,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,2560,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,2048,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,2048,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,1536,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,1536,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,1024,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,1024,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,768,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,768,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,256,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,64,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,12288,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,12288,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,16384,0.06313599646091461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,16384,0.06508799642324448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,16384,0.05619199946522713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,10240,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,12288,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,10240,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,8192,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,10240,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,7168,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,8192,0.04806400090456009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,8192,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,65536,0.18371200561523438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,65536,0.20604799687862396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,7168,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,6144,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,7168,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,6144,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,5120,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,5120,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,6144,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,5120,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,4096,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,4096,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,3584,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,3584,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,3072,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,65536,0.20524799823760986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,2560,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,3072,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,2048,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,2560,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,2048,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,1536,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,1536,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,1024,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,1024,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,768,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,256,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,256,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,128,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,32,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,12288,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,128,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,16384,0.059967998415231705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,16384,0.059776000678539276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,12288,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,16384,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,12288,0.04608000069856644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,10240,0.05993599817156792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,10240,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,8192,0.050335999578237534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,10240,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,65536,0.16944000124931335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,65536,0.17980800569057465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,8192,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,7168,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,8192,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,7168,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,6144,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,6144,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,7168,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,5120,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,5120,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,5120,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,6144,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,4096,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,4096,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,65536,0.20403200387954712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,3584,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,3072,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,3584,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,3072,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,2560,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,2560,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,2048,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,2048,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,1536,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,1536,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,1024,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,1024,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,768,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,512,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,12288,0.04339199885725975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,12288,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,16384,0.053599998354911804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,16384,0.0551999993622303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,16384,0.05673599988222122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,12288,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,10240,0.056832000613212585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,10240,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,10240,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,8192,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,8192,0.04831999912858009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,7168,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,65536,0.16687999665737152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,8192,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,65536,0.1571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,7168,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,7168,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,6144,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,6144,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,5120,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,4096,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,6144,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,5120,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,5120,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,4096,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,65536,0.2046079933643341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,3584,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,3584,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,3072,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,3072,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,2560,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,2560,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,2048,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,1536,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,2048,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,1536,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,1024,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,4096,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,1024,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,768,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,1024,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,512,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,768,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,256,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,12288,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,12288,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,16384,0.06111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,16384,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,16384,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,10240,0.05427199974656105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,12288,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,10240,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,10240,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,8192,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,8192,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,8192,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,65536,0.14563199877738953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,7168,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,65536,0.14083200693130493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,7168,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,6144,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,7168,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,5120,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,6144,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,5120,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,6144,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,4096,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,5120,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,3584,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,4096,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,3584,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,65536,0.12806400656700134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,4096,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,3072,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,3072,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,2560,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,2560,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,2048,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,2048,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,3072,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,1536,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,1536,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,2048,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,1024,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,1536,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,1024,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,1024,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,768,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,512,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,256,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,12288,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,12288,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,16384,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,16384,0.044319998472929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,16384,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,12288,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,10240,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,10240,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,10240,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,8192,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,8192,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,65536,0.11318399757146835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,8192,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,7168,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,7168,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,6144,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,7168,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,6144,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,5120,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,6144,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,5120,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,4096,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,4096,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,5120,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,65536,0.12009599804878235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,65536,0.11584000289440155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,3584,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,4096,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,3584,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,3072,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,3072,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,3072,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,2560,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,2560,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,2048,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,2560,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,1536,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,1536,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,1536,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,1024,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,768,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,12288,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,12288,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,16384,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,16384,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,16384,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,12288,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,10240,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,10240,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,8192,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,10240,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,8192,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,65536,0.09359999746084213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,8192,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,7168,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,65536,0.0939520001411438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,7168,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,7168,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,6144,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,5120,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,6144,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,5120,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,5120,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,4096,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,65536,0.11798399686813354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,4096,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,4096,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,3584,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,3584,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,3072,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,3584,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,2560,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,2048,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,1536,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,1024,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,12288,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,16384,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,16384,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,12288,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,16384,0.035392001271247864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,12288,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,10240,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,10240,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,10240,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,8192,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,65536,0.07865600287914276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,8192,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,8192,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,7168,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,65536,0.07913599908351898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,7168,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,6144,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,7168,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,6144,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,6144,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,5120,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,4096,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,5120,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,3584,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,65536,0.1178240031003952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,4096,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,3584,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,2560,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,2048,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,1536,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,1024,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,16384,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,12288,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,16384,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,16384,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,12288,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,10240,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,10240,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,10240,0.0379519984126091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,12288,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,8192,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,65536,0.06828799843788147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,65536,0.06864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,8192,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,8192,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,7168,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,7168,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,6144,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,7168,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,6144,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,5120,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,65536,0.11734399944543839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,5120,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,5120,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,4096,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,3584,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,3072,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,2560,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,1536,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,2048,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,1536,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,12288,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,12288,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,16384,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,16384,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,16384,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,12288,0.04531199857592583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,10240,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,10240,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,8192,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,10240,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,65536,0.055135998874902725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,65536,0.07545600086450577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,8192,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,65536,0.11667200177907944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,7168,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,7168,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,6144,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,7168,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,5120,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,6144,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,5120,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,5120,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,4096,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,3584,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,4096,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,3072,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,2560,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,2048,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,1536,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,64,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,12288,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,12288,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,16384,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,16384,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,16384,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,12288,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,10240,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,8192,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,10240,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,10240,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,8192,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,65536,0.05510399863123894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,65536,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,7168,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,7168,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,6144,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,6144,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,7168,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,5120,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,6144,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,5120,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,4096,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,5120,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,4096,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,65536,0.11727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,4096,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,3584,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,3584,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,3072,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,2560,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,2048,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,1536,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,1024,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,128,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,32,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,12288,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,12288,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,16384,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,16384,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,16384,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,12288,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,10240,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,10240,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,10240,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,8192,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,8192,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,65536,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,65536,0.04668800160288811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,8192,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,7168,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,6144,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,6144,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,7168,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,6144,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,65536,0.11708799749612808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,5120,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,4096,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,3584,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,4096,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,3584,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,3072,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,5120,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,2048,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,2048,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,1536,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,1024,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,128,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,64,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,12288,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,12288,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,16384,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,16384,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,16384,0.055424001067876816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,12288,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,10240,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,10240,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,8192,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,8192,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,65536,0.046592000871896744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,65536,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,8192,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,7168,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,7168,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,7168,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,6144,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,6144,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,5120,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,65536,0.21209600567817688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,5120,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,3072,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,3584,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,2560,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,2560,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,2048,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,1024,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,768,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,32,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,12288,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,16384,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,16384,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,12288,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,16384,0.05004800111055374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,12288,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,10240,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,10240,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,8192,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,65536,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,65536,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,8192,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,10240,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,8192,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,7168,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,7168,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,6144,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,6144,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,7168,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,65536,0.20112000405788422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,6144,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,5120,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,5120,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,4096,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,3584,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,3072,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,2560,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,2048,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,1536,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,768,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,1024,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,512,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,256,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,128,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,32,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,12288,1.0237760543823242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,10240,0.5305280089378357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,16384,0.840831995010376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,12288,1.1073919534683228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,12288,0.40828800201416016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,10240,0.5645440220832825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,16384,1.4291839599609375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,7168,0.3702720105648041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,8192,0.3561280071735382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,7168,0.3073920011520386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,8192,0.2807680070400238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,8192,0.43196800351142883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,6144,0.3277760148048401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,6144,0.33852800726890564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,10240,0.34598401188850403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,6144,0.21753600239753723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,5120,0.2634879946708679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,5120,0.27961599826812744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,7168,0.2491839975118637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,16384,0.5412160158157349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,4096,0.20563200116157532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,4096,0.21158400177955627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,3584,0.17023999989032745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,3584,0.2072640061378479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,3072,0.17241600155830383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,3584,0.1351040005683899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,5120,0.18345600366592407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,4096,0.15190400183200836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,2560,0.23254400491714478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,2560,0.1363839954137802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,3072,0.14636799693107605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,2048,0.20723199844360352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,1536,0.14857600629329681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,1536,0.15622399747371674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,2048,0.20239999890327454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,2560,0.10419200360774994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,2048,0.08953599631786346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,1536,0.07295999675989151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,1024,0.10441599786281586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,1024,0.10505600273609161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,768,0.07631999999284744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,768,0.07651200145483017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,1024,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,768,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,512,0.05443200096487999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,512,0.05459199845790863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,256,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,512,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,256,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,128,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,128,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,3072,0.12095999717712402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,256,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,128,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,64,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,64,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,32,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,32,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,64,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,32,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,12288,0.2685439884662628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,16384,0.3524799942970276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,16384,0.20416000485420227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,12288,0.16291199624538422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,16384,0.1382399946451187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,10240,0.13177600502967834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,10240,0.21900799870491028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,8192,0.11116799712181091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,12288,0.10889600217342377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,10240,0.09225600212812424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,8192,0.17635199427604675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,7168,0.16387200355529785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,65536,0.86080002784729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,7168,0.0875839963555336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,7168,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,8192,0.07385600358247757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,6144,0.08278399705886841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,6144,0.07555200159549713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,65536,0.894976019859314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,5120,0.0655359998345375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,5120,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,6144,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,4096,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,4096,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,3584,0.08191999793052673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,5120,0.05023999884724617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,3584,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,4096,0.042399998754262924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,3584,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,3072,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,3072,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,2560,0.06172800064086914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,2560,0.06095999851822853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,3072,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,2048,0.052319999784231186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,2560,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,2048,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,2048,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,1536,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,1536,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,1536,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,1024,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,1024,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,1024,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,768,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,768,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,512,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,512,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,768,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,256,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,256,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,512,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,65536,0.5202239751815796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,256,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,128,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,128,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,128,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,64,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,64,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,32,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,64,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,32,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,32,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,12288,0.14793600142002106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,16384,0.18300800025463104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,16384,0.16502399742603302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,16384,0.13913600146770477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,10240,0.1066880002617836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,10240,0.12742400169372559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,12288,0.10726399719715118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,12288,0.14563199877738953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,8192,0.10700800269842148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,65536,0.6211199760437012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,8192,0.1064319983124733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,10240,0.09180799871683121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,8192,0.07487999647855759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,7168,0.14403200149536133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,7168,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,6144,0.07766400277614594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,6144,0.0676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,7168,0.06732799857854843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,5120,0.0647680014371872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,5120,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,6144,0.059039998799562454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,5120,0.05004800111055374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,4096,0.0828159973025322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,4096,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,3584,0.04368000105023384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,3584,0.04800000041723251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,4096,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,3584,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,65536,0.6138240098953247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,3072,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,3072,0.05958399921655655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,3072,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,2560,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,2560,0.05526399984955788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,2560,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,2048,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,65536,0.5265920162200928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,2048,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,2048,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,1536,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,1536,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,1024,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,1024,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,1536,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,768,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,1024,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,768,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,512,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,512,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,768,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,256,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,512,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,256,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,128,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,128,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,64,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,64,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,32,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,32,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,12288,0.12310399860143661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,12288,0.1334719955921173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,16384,0.16329599916934967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,16384,0.16316799819469452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,16384,0.1252480000257492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,10240,0.10608000308275223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,12288,0.09785600006580353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,10240,0.08470399677753448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,8192,0.08851200342178345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,10240,0.1080000028014183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,65536,0.6221759915351868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,8192,0.08915200084447861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,7168,0.0809599980711937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,8192,0.06691200286149979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,7168,0.07811199873685837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,65536,0.5206720232963562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,7168,0.06032000109553337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,6144,0.07180800288915634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,6144,0.07177600264549255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,5120,0.06207999959588051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,5120,0.09372799843549728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,6144,0.054336000233888626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,4096,0.05571199953556061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,4096,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,5120,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,4096,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,3584,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,3584,0.06710399687290192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,3072,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,3072,0.05567999929189682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,3584,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,3072,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,2560,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,2560,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,2048,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,2048,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,65536,0.5213119983673096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,1536,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,2560,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,1536,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,2048,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,1536,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,1024,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,1024,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,1024,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,768,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,768,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,768,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,512,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,256,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,256,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,128,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,512,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,128,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,64,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,64,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,32,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,32,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,512,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,12288,0.10755199939012527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,16384,0.12857599556446075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,16384,0.12572799623012543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,16384,0.10700800269842148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,12288,0.09728000313043594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,12288,0.0777600035071373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,10240,0.11072000116109848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,8192,0.0904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,10240,0.11327999830245972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,10240,0.06572800129652023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,65536,0.3685440123081207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,7168,0.06825599819421768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,8192,0.08825600147247314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,8192,0.05375999957323074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,7168,0.09375999867916107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,65536,0.39737600088119507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,6144,0.07353600114583969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,7168,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,6144,0.08137600123882294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,5120,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,5120,0.0676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,6144,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,4096,0.05036799982190132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,5120,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,4096,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,3584,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,3584,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,3584,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,3072,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,4096,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,3072,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,65536,0.37676799297332764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,2560,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,3072,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,2560,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,2048,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,2560,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,2048,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,1536,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,2048,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,1536,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,1024,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,1024,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,1024,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,1536,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,768,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,768,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,512,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,768,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,512,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,256,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,512,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,64,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,32,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,32,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,12288,0.13155199587345123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,12288,0.1308159977197647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,16384,0.08835200220346451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,16384,0.11913599818944931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,16384,0.0976639986038208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,12288,0.07667200267314911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,10240,0.1106560006737709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,10240,0.11535999923944473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,8192,0.0899839997291565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,10240,0.06652799993753433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,8192,0.09084799885749817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,65536,0.32572799921035767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,8192,0.05318399891257286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,7168,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,6144,0.07062400132417679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,7168,0.04912000149488449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,6144,0.07171200215816498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,6144,0.042047999799251556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,5120,0.06275200098752975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,5120,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,7168,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,4096,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,4096,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,65536,0.28140801191329956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,5120,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,4096,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,65536,0.36976000666618347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,3584,0.046911999583244324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,3072,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,3072,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,3584,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,3584,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,2560,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,3072,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,2560,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,2560,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,2048,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,2048,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,2048,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,1536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,1536,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,1024,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,1024,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,1536,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,1024,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,768,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,768,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,512,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,768,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,512,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,32,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,32,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,12288,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,12288,0.07574400305747986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,16384,0.10524799674749374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,16384,0.07999999821186066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,12288,0.062111999839544296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,16384,0.07865600287914276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,10240,0.08835200220346451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,10240,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,8192,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,10240,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,8192,0.08377599716186523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,8192,0.04364800080657005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,7168,0.07711999863386154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,65536,0.2487040013074875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,7168,0.08575999736785889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,6144,0.05462399870157242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,65536,0.40169599652290344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,7168,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,6144,0.06499200314283371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,5120,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,5120,0.06473600119352341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,4096,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,5120,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,6144,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,4096,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,3584,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,3584,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,3072,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,4096,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,3072,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,3072,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,3584,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,2560,0.036959998309612274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,2560,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,65536,0.29526400566101074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,2560,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,2048,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,2048,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,1536,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,2048,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,1536,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,1024,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,1024,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,768,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,1536,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,1024,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,768,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,512,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,768,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,256,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,512,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,256,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,12288,0.07680000364780426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,16384,0.07443200051784515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,16384,0.07363200187683105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,16384,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,32,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,12288,0.07091200351715088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,12288,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,10240,0.06787200272083282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,10240,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,8192,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,8192,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,65536,0.24937599897384644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,10240,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,65536,0.3742400109767914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,7168,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,6144,0.06172800064086914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,8192,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,7168,0.06499200314283371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,7168,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,6144,0.06844799965620041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,5120,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,6144,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,5120,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,4096,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,5120,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,3584,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,4096,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,3584,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,4096,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,3584,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,65536,0.2364799976348877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,3072,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,3072,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,3072,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,2560,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,2560,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,2048,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,2048,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,2560,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,1536,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,1536,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,1024,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,768,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,1024,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,1024,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,768,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,512,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,512,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,256,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,256,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,128,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,32,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,12288,0.0679360032081604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,12288,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,16384,0.07068800181150436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,16384,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,16384,0.05651199817657471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,12288,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,10240,0.07916799932718277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,10240,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,8192,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,8192,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,10240,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,7168,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,7168,0.049247998744249344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,8192,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,65536,0.22233599424362183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,65536,0.20985600352287292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,7168,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,6144,0.05036799982190132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,65536,0.2043839991092682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,5120,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,6144,0.04569600149989128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,6144,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,5120,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,5120,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,4096,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,4096,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,3584,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,3072,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,3072,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,3584,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,2560,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,2560,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,2048,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,1536,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,2048,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,1536,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,1024,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,1024,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,768,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,768,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,768,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,512,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,512,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,32,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,12288,0.08716800063848495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,12288,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,16384,0.06377600133419037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,16384,0.06457599997520447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,12288,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,16384,0.05737600103020668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,10240,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,10240,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,10240,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,8192,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,65536,0.18294399976730347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,8192,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,65536,0.2024639993906021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,65536,0.15990400314331055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,7168,0.05027199909090996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,7168,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,7168,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,6144,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,6144,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,5120,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,5120,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,4096,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,4096,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,3584,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,3584,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,3072,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,2560,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,3072,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,2560,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,3584,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,2048,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,2048,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,1536,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,1024,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,1536,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,1024,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,768,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,768,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,32,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,12288,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,16384,0.05628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,16384,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,16384,0.05628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,12288,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,10240,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,10240,0.058687999844551086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,12288,0.06943999975919724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,10240,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,8192,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,8192,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,65536,0.17203199863433838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,65536,0.17686399817466736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,8192,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,65536,0.2035199999809265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,7168,0.046592000871896744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,7168,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,6144,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,6144,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,7168,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,5120,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,5120,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,6144,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,4096,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,4096,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,3584,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,5120,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,3072,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,3584,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,3072,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,2560,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,2560,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,2048,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,2048,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,1536,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,1536,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,1024,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,1024,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,1024,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,768,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,768,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,768,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,512,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,12288,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,12288,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,16384,0.06300800293684006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,16384,0.053247999399900436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,16384,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,12288,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,10240,0.05244800075888634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,10240,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,8192,0.05027199909090996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,10240,0.043776001781225204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,8192,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,65536,0.15491199493408203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,7168,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,7168,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,8192,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,6144,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,7168,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,65536,0.13011200726032257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,5120,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,6144,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,5120,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,4096,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,4096,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,6144,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,65536,0.12777599692344666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,3584,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,3584,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,3072,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,3072,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,2560,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,2560,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,2048,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,2560,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,2048,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,1536,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,1536,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,1024,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,1536,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,1024,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,768,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,12288,0.04169600084424019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,12288,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,16384,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,16384,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,16384,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,12288,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,10240,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,10240,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,10240,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,8192,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,8192,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,7168,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,65536,0.11593600362539291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,65536,0.1348479986190796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,8192,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,7168,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,65536,0.11881600320339203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,7168,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,6144,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,6144,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,5120,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,6144,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,5120,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,5120,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,4096,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,3584,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,4096,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,3072,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,3072,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,3584,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,2560,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,2048,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,2048,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,2048,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,1536,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,1024,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,512,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,12288,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,12288,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,16384,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,16384,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,16384,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,12288,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,10240,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,10240,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,8192,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,8192,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,10240,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,7168,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,65536,0.10419200360774994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,65536,0.1042879968881607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,7168,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,6144,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,6144,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,7168,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,5120,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,6144,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,65536,0.11820799857378006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,5120,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,4096,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,5120,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,4096,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,8192,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,3584,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,3584,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,3072,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,2560,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,2048,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,1536,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,2048,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,2560,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,1024,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,768,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,12288,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,12288,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,16384,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,16384,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,16384,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,12288,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,10240,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,10240,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,10240,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,8192,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,8192,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,7168,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,65536,0.08048000186681747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,8192,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,65536,0.0769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,65536,0.11776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,7168,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,6144,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,5120,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,4096,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,5120,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,6144,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,5120,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,4096,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,3584,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,3072,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,2048,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,1536,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,1024,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,12288,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,16384,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,16384,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,12288,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,16384,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,12288,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,10240,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,10240,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,8192,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,8192,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,8192,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,10240,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,65536,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,65536,0.07110399752855301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,7168,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,7168,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,6144,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,6144,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,5120,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,6144,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,5120,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,7168,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,4096,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,5120,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,65536,0.11801599711179733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,4096,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,3584,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,3072,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,2560,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,2048,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,1536,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,12288,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,12288,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,16384,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,16384,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,16384,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,12288,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,10240,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,10240,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,10240,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,8192,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,8192,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,65536,0.06857600063085556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,8192,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,65536,0.05820799991488457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,7168,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,6144,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,6144,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,7168,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,5120,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,5120,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,4096,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,65536,0.11660800129175186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,4096,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,3584,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,3072,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,2560,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,2048,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,2048,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,1536,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,1024,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,12288,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,12288,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,16384,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,16384,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,16384,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,12288,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,10240,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,10240,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,8192,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,8192,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,65536,0.04700800031423569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,8192,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,7168,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,65536,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,7168,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,6144,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,7168,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,6144,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,6144,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,5120,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,5120,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,5120,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,3584,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,4096,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,65536,0.11760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,3584,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,3072,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,2560,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,1536,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,1024,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,12288,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,12288,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,16384,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,16384,0.057151999324560165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,12288,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,10240,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,10240,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,10240,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,8192,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,8192,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,16384,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,65536,0.050655998289585114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,65536,0.11689600348472595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,65536,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,8192,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,7168,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,7168,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,6144,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,6144,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,5120,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,5120,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,4096,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,4096,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,3584,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,3072,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,2560,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,2560,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,2048,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,1536,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,12288,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,12288,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,16384,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,16384,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,16384,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,12288,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,10240,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,10240,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,10240,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,8192,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,65536,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,8192,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,65536,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,7168,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,8192,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,6144,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,7168,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,6144,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,7168,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,5120,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,65536,0.11692799627780914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,5120,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,4096,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,3584,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,3584,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,3072,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,2560,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,2048,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,1536,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,768,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,256,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,64,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,12288,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,12288,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,16384,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,16384,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,12288,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,16384,0.05580800026655197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,10240,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,10240,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,10240,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,8192,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,8192,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,65536,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,65536,0.05084799975156784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,7168,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,65536,0.20131200551986694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,8192,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,7168,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,6144,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,7168,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,6144,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,5120,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,5120,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,4096,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,4096,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,3584,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,3072,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,2560,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,2048,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,1536,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,1024,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,768,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,256,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,128,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,32,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,12288,0.302047997713089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,16384,0.3929919898509979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,12288,0.2953599989414215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,16384,0.38572800159454346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,12288,0.23104000091552734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,10240,0.2513279914855957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,8192,0.20230400562286377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,8192,0.20483200252056122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,16384,0.29235199093818665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,10240,0.25308799743652344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,10240,0.19696000218391418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,7168,0.1791040003299713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,7168,0.17980800569057465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,8192,0.15609599649906158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,6144,0.15625600516796112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,5120,0.132192000746727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,7168,0.14419199526309967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,5120,0.13363200426101685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,6144,0.15600000321865082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,4096,0.10924799740314484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,5120,0.10838399827480316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,3584,0.09708800166845322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,6144,0.1279360055923462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,3584,0.09705600142478943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,4096,0.10787200182676315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,4096,0.08940800279378891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,3072,0.08607999980449677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,3072,0.08515200018882751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,2560,0.07372800260782242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,2560,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,3584,0.08207999914884567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,2048,0.06255999952554703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,2560,0.06265600025653839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,3072,0.07369600236415863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,2048,0.06265600025653839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,1536,0.05004800111055374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,1536,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,1024,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,1024,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,1536,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,2048,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,768,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,768,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,1024,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,512,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,512,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,768,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,256,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,512,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,256,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,256,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,128,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,128,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,64,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,64,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,128,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,32,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,64,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,32,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,32,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,12288,0.0814720019698143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,16384,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,16384,0.10515200346708298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,12288,0.08044800162315369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,16384,0.110944002866745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,10240,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,10240,0.06950400024652481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,12288,0.0854400023818016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,8192,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,10240,0.07251200079917908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,8192,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,65536,0.38995200395584106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,7168,0.05196800082921982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,8192,0.06128000095486641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,7168,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,6144,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,65536,0.38288000226020813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,6144,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,7168,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,5120,0.048128001391887665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,5120,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,6144,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,4096,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,4096,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,5120,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,4096,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,3584,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,3584,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,3072,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,3584,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,3072,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,3072,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,2560,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,2560,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,2560,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,2048,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,2048,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,1536,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,2048,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,1536,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,65536,0.4521600008010864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,1024,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,1536,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,1024,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,768,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,768,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,1024,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,768,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,512,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,512,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,256,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,32,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,64,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,32,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,32,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,256,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,16384,0.10425599664449692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,12288,0.08214399963617325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,16384,0.10380800068378448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,12288,0.08086399734020233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,16384,0.08588799834251404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,10240,0.06889600306749344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,12288,0.06540799885988235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,10240,0.06988800317049026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,10240,0.057312000542879105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,65536,0.3891200125217438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,8192,0.057472001761198044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,8192,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,8192,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,7168,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,65536,0.38624000549316406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,7168,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,6144,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,6144,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,6144,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,5120,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,5120,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,7168,0.04368000105023384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,5120,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,4096,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,4096,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,3584,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,3584,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,4096,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,65536,0.33852800726890564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,3072,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,3584,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,3072,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,3072,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,2560,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,2560,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,2048,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,2560,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,2048,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,1536,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,2048,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,1024,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,1536,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,1536,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,1024,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,768,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,768,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,1024,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,512,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,768,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,512,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,256,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,32,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,32,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,16384,0.10425599664449692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,16384,0.10451199859380722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,12288,0.08153600245714188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,12288,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,16384,0.08214399963617325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,10240,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,12288,0.06451199948787689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,10240,0.06934399902820587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,8192,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,10240,0.05455999821424484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,65536,0.3887360095977783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,8192,0.05830400064587593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,7168,0.050655998289585114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,65536,0.3792960047721863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,7168,0.05004800111055374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,8192,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,7168,0.04233599826693535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,6144,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,6144,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,5120,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,5120,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,6144,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,4096,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,3584,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,5120,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,3584,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,4096,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,3584,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,65536,0.30697599053382874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,3072,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,3072,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,2560,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,2560,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,3072,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,2048,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,2560,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,1536,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,1536,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,4096,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,2048,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,2048,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,1024,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,1024,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,1536,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,768,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,1024,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,768,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,768,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,512,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,512,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,256,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,64,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,32,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,12288,0.05036799982190132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,12288,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,16384,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,16384,0.06249599903821945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,10240,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,16384,0.058079998940229416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,12288,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,10240,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,10240,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,8192,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,8192,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,7168,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,8192,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,65536,0.2024960070848465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,7168,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,65536,0.20051200687885284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,6144,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,6144,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,7168,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,6144,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,5120,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,5120,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,4096,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,4096,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,3584,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,3584,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,5120,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,3072,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,3072,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,3584,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,65536,0.2521600127220154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,2560,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,3072,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,2560,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,2048,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,1536,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,2048,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,1024,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,1024,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,1024,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,512,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,768,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,32,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,12288,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,16384,0.059487998485565186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,12288,0.04886399954557419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,16384,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,12288,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,10240,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,10240,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,8192,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,8192,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,10240,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,65536,0.18223999440670013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,16384,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,65536,0.17868800461292267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,7168,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,7168,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,6144,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,6144,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,7168,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,5120,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,6144,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,5120,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,4096,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,65536,0.21059200167655945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,4096,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,5120,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,3584,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,3584,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,3072,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,2560,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,3072,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,2560,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,1536,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,1024,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,1024,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,1536,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,768,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,512,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,64,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,32,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,12288,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,12288,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,16384,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,16384,0.053119998425245285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,16384,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,12288,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,10240,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,10240,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,10240,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,8192,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,8192,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,7168,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,8192,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,7168,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,65536,0.1552319973707199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,65536,0.16073599457740784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,6144,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,6144,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,7168,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,5120,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,6144,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,5120,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,4096,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,4096,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,5120,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,3584,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,4096,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,3584,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,3584,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,65536,0.20553599298000336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,3072,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,3072,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,2560,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,2560,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,3072,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,2048,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,2048,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,2048,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,1536,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,1536,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,1024,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,768,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,768,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,512,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,12288,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,12288,0.07023999840021133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,16384,0.09027200192213058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,16384,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,10240,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,12288,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,16384,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,10240,0.06019200012087822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,8192,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,10240,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,8192,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,8192,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,65536,0.13574400544166565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,7168,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,7168,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,65536,0.1369280070066452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,6144,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,6144,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,7168,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,5120,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,5120,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,4096,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,6144,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,5120,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,4096,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,4096,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,3584,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,3584,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,65536,0.20572799444198608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,3072,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,3072,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,3072,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,2560,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,2048,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,2048,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,768,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,1024,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,1024,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,1536,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,768,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,512,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,512,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,12288,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,16384,0.04374400153756142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,16384,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,16384,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,10240,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,10240,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,12288,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,10240,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,12288,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,8192,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,8192,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,65536,0.11392000317573547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,65536,0.11648000031709671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,8192,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,7168,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,7168,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,6144,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,6144,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,5120,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,6144,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,5120,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,4096,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,65536,0.12495999783277512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,4096,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,5120,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,3072,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,3584,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,3072,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,2560,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,2048,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,2560,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,2048,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,1536,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,1536,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,32,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,12288,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,12288,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,16384,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,16384,0.04310400038957596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,16384,0.03641600161790848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,12288,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,10240,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,10240,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,10240,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,8192,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,8192,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,7168,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,65536,0.10300800204277039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,65536,0.10655999928712845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,8192,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,7168,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,6144,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,7168,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,5120,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,6144,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,5120,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,6144,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,5120,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,4096,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,3584,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,3584,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,65536,0.12361600250005722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,3584,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,3072,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,2048,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,1536,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,1536,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,768,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,32,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,12288,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,16384,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,16384,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,12288,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,16384,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,12288,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,10240,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,10240,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,10240,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,8192,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,8192,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,7168,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,65536,0.09459199756383896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,8192,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,65536,0.0936959981918335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,7168,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,6144,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,7168,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,5120,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,6144,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,6144,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,5120,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,5120,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,4096,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,65536,0.1223360002040863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,4096,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,3584,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,3584,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,3584,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,2560,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,12288,0.035392001271247864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,12288,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,16384,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,16384,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,16384,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,12288,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,10240,0.03209599852561951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,10240,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,8192,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,10240,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,8192,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,65536,0.08521600067615509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,65536,0.08905600011348724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,8192,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,7168,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,7168,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,6144,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,6144,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,7168,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,5120,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,6144,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,4096,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,65536,0.11999999731779099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,5120,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,4096,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,3584,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,3072,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,2560,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,2048,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,5120,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,1024,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,1536,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,12288,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,12288,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,16384,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,16384,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,16384,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,10240,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,12288,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,10240,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,8192,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,10240,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,8192,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,7168,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,8192,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,65536,0.09529600292444229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,65536,0.08265600353479385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,7168,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,6144,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,7168,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,6144,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,5120,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,5120,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,6144,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,5120,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,65536,0.11897599697113037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,4096,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,3584,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,3072,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,2560,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,2048,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,1536,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,768,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,12288,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,16384,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,16384,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,16384,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,12288,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,10240,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,10240,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,8192,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,8192,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,10240,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,65536,0.06870400160551071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,8192,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,7168,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,7168,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,7168,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,6144,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,6144,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,5120,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,6144,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,65536,0.06825599819421768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,5120,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,65536,0.11910399794578552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,4096,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,3584,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,3584,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,2560,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,3072,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,2048,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,2560,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,1536,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,2048,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,12288,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,16384,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,16384,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,12288,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,16384,0.057023998349905014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,12288,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,10240,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,10240,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,8192,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,8192,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,7168,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,65536,0.06297600269317627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,65536,0.06191999837756157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,8192,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,7168,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,7168,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,6144,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,6144,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,5120,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,5120,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,6144,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,5120,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,65536,0.11817599833011627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,4096,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,4096,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,3072,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,2560,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,2048,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,1024,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,12288,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,12288,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,16384,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,16384,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,16384,0.05801599845290184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,12288,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,10240,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,10240,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,10240,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,8192,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,8192,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,65536,0.0541439987719059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,8192,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,7168,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,65536,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,7168,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,6144,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,5120,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,6144,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,7168,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,5120,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,4096,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,65536,0.1181119978427887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,4096,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,3584,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,3584,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,3072,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,2560,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,2048,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,2048,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,1536,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,1536,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,1024,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,12288,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,16384,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,16384,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,16384,0.05766399949789047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,12288,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,10240,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,10240,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,12288,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,65536,0.05382400006055832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,8192,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,8192,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,10240,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,65536,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,7168,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,7168,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,8192,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,6144,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,7168,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,6144,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,6144,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,5120,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,65536,0.11737599968910217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,5120,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,4096,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,4096,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,3584,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,3584,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,2560,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,3072,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,2048,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,1536,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,1536,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,12288,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,12288,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,16384,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,16384,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,16384,0.05692800134420395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,12288,0.043616000562906265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,10240,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,10240,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,10240,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,8192,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,8192,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,65536,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,7168,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,65536,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,8192,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,6144,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,7168,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,6144,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,6144,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,5120,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,5120,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,65536,0.11846400052309036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,4096,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,5120,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,4096,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,2048,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,1536,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,1536,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,1024,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,16384,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,12288,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,12288,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,16384,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,16384,0.05830400064587593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,12288,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,10240,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,8192,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,10240,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,10240,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,65536,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,8192,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,7168,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,65536,0.04032000154256821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,8192,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,7168,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,7168,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,6144,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,5120,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,65536,0.20227199792861938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,6144,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,5120,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,4096,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,4096,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,4096,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,5120,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,3584,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,3584,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,3072,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,3072,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,2560,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,2560,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,2048,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,1536,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,64,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,12288,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,12288,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,16384,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,16384,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,16384,0.05766399949789047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,12288,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,10240,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,10240,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,8192,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,10240,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,65536,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,8192,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,65536,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,8192,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,6144,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,7168,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,6144,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,5120,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,4096,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,5120,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,65536,0.11766400188207626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,4096,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,3072,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,2560,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,3072,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,7168,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,2560,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,2048,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,2048,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,2048,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,1536,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,12288,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,12288,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,16384,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,16384,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,16384,0.05129599943757057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,12288,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,10240,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,10240,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,8192,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,10240,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,8192,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,65536,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,8192,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,65536,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,7168,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,6144,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,7168,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,6144,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,7168,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,6144,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,5120,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,5120,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,4096,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,65536,0.2040639966726303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,4096,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,3584,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,3584,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,2560,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,3072,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,3072,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,2560,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,3072,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,2048,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,768,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,64,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,12288,0.29817599058151245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,16384,0.39664000272750854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,16384,0.4144960045814514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,12288,0.32707199454307556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,10240,0.27270400524139404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,12288,0.23465600609779358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,10240,0.19910399615764618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,8192,0.21580800414085388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,10240,0.24793599545955658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,16384,0.28883200883865356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,8192,0.20441600680351257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,7168,0.18003199994564056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,6144,0.17219200730323792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,6144,0.15481600165367126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,5120,0.13327999413013458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,5120,0.17744000256061554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,7168,0.1931840032339096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,8192,0.15516799688339233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,7168,0.14083200693130493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,6144,0.1265919953584671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,4096,0.12044800072908401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,4096,0.11007999628782272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,4096,0.08895999938249588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,5120,0.1056319996714592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,3584,0.10623999685049057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,3584,0.09718400239944458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,3072,0.08614400029182434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,3072,0.08975999802350998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,3584,0.07932800054550171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,3072,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,2048,0.06489600241184235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,2560,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,2048,0.06300800293684006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,2560,0.061055999249219894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,2560,0.07443200051784515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,1536,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,1536,0.05331199988722801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,2048,0.052639998495578766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,1024,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,1024,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,768,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,1536,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,1024,0.033504001796245575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,768,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,512,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,512,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,256,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,768,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,256,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,512,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,256,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,128,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,64,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,128,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,64,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,128,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,64,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,32,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,32,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,32,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,12288,0.08054400235414505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,12288,0.08748800307512283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,16384,0.10499200224876404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,16384,0.11241599917411804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,16384,0.10713600367307663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,10240,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,10240,0.07487999647855759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,12288,0.0862400010228157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,10240,0.07366400212049484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,8192,0.05967999994754791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,8192,0.08035200089216232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,7168,0.05196800082921982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,65536,0.4005120098590851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,8192,0.06047999858856201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,7168,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,6144,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,6144,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,7168,0.054336000233888626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,6144,0.04726399853825569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,5120,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,65536,0.4694080054759979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,5120,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,5120,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,4096,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,4096,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,3584,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,4096,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,3584,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,3072,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,3072,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,2560,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,2560,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,3072,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,2560,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,2048,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,2048,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,1536,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,1536,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,2048,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,65536,0.41923201084136963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,1536,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,1024,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,1024,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,512,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,768,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,768,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,1024,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,768,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,512,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,3584,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,256,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,256,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,128,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,128,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,32,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,32,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,12288,0.08563199639320374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,12288,0.08099199831485748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,16384,0.10550399869680405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,16384,0.10259199887514114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,16384,0.08233600109815598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,10240,0.06880000233650208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,12288,0.0644799992442131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,10240,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,8192,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,10240,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,8192,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,65536,0.3887679874897003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,8192,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,7168,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,7168,0.050944000482559204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,6144,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,7168,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,6144,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,65536,0.3770560026168823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,5120,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,5120,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,6144,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,5120,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,4096,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,4096,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,3584,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,3584,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,4096,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,3072,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,3072,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,3584,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,3072,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,2560,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,2560,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,2048,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,2048,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,65536,0.3065600097179413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,2560,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,2048,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,1536,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,1536,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,1024,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,1024,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,1536,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,768,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,1024,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,768,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,512,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,512,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,256,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,128,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,128,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,64,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,32,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,12288,0.07996799796819687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,16384,0.1050880029797554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,16384,0.10380800068378448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,16384,0.07980799674987793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,12288,0.0631679967045784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,12288,0.07932800054550171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,10240,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,10240,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,65536,0.39184001088142395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,8192,0.057472001761198044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,8192,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,10240,0.05548800155520439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,65536,0.38499200344085693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,7168,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,8192,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,7168,0.050175998359918594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,6144,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,6144,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,7168,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,6144,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,5120,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,5120,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,4096,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,5120,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,3584,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,4096,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,4096,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,3584,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,65536,0.29951998591423035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,3584,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,3072,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,3072,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,2560,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,2560,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,3072,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,2560,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,2048,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,2048,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,1536,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,1536,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,2048,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,1024,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,1536,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,1024,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,768,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,768,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,1024,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,512,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,512,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,256,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,256,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,128,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,64,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,32,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,32,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,12288,0.05651199817657471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,16384,0.06268800050020218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,12288,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,16384,0.0663359984755516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,16384,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,10240,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,12288,0.04726399853825569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,10240,0.050175998359918594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,10240,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,8192,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,8192,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,65536,0.20054399967193604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,7168,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,8192,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,7168,0.044319998472929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,7168,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,6144,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,6144,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,65536,0.21376000344753265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,5120,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,6144,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,5120,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,4096,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,4096,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,3584,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,5120,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,3584,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,3072,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,65536,0.2128320038318634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,3072,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,2560,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,2560,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,2048,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,2048,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,1536,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,1536,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,1024,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,1024,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,768,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,768,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,512,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,64,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,32,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,16384,0.06310400366783142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,12288,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,12288,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,16384,0.059039998799562454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,16384,0.05711999908089638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,12288,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,10240,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,10240,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,8192,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,8192,0.05004800111055374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,10240,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,65536,0.17737600207328796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,7168,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,7168,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,8192,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,65536,0.1913280040025711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,7168,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,6144,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,6144,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,5120,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,5120,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,4096,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,5120,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,4096,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,3584,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,3584,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,65536,0.2054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,3072,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,3072,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,2560,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,2560,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,2560,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,2048,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,2048,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,1536,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,1536,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,1024,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,1024,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,1024,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,768,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,768,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,12288,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,12288,0.048128001391887665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,16384,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,16384,0.053408000618219376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,16384,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,12288,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,10240,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,10240,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,10240,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,8192,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,8192,0.05011200159788132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,65536,0.15516799688339233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,65536,0.17401599884033203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,7168,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,6144,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,7168,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,6144,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,5120,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,5120,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,4096,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,5120,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,7168,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,4096,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,65536,0.20534400641918182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,3584,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,3584,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,3072,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,3072,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,2560,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,3072,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,2560,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,2560,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,2048,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,1536,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,2048,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,1536,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,1536,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,1024,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,1024,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,1024,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,768,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,768,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,512,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,512,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,32,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,12288,0.06937599927186966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,12288,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,16384,0.08975999802350998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,16384,0.04867200180888176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,16384,0.05657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,12288,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,10240,0.058400001376867294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,10240,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,8192,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,8192,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,65536,0.13737599551677704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,7168,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,8192,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,65536,0.1372479945421219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,7168,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,6144,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,7168,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,6144,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,5120,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,5120,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,6144,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,5120,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,4096,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,4096,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,3584,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,3584,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,4096,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,3584,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,3072,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,65536,0.20243200659751892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,3072,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,2560,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,2560,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,2048,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,2048,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,1536,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,1536,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,1536,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,1024,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,1024,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,768,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,768,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,1024,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,768,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,512,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,32,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,12288,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,16384,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,16384,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,12288,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,16384,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,12288,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,10240,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,10240,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,8192,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,10240,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,8192,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,65536,0.11443199962377548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,7168,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,8192,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,65536,0.12195199728012085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,7168,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,6144,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,7168,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,6144,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,5120,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,6144,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,5120,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,5120,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,4096,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,4096,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,3584,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,4096,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,3584,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,65536,0.1223360002040863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,3072,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,3072,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,3072,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,2560,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,2560,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,2048,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,2048,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,2048,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,2560,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,1536,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,1024,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,1024,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,12288,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,12288,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,16384,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,16384,0.04310400038957596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,16384,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,12288,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,10240,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,10240,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,10240,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,8192,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,7168,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,8192,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,65536,0.10329599678516388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,8192,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,65536,0.11587200313806534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,7168,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,6144,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,6144,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,7168,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,5120,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,6144,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,5120,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,4096,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,5120,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,3584,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,4096,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,65536,0.11897599697113037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,3072,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,2560,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,3072,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,2048,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,1536,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,1536,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,3584,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,12288,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,12288,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,16384,0.04374400153756142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,16384,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,16384,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,12288,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,10240,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,10240,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,10240,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,8192,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,8192,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,7168,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,65536,0.09564799815416336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,8192,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,65536,0.10070399940013885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,7168,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,6144,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,7168,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,6144,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,6144,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,5120,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,5120,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,5120,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,4096,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,4096,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,4096,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,3584,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,65536,0.11935999989509583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,3584,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,3584,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,2560,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,3072,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,2560,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,2048,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,1536,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,768,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,12288,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,12288,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,16384,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,16384,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,12288,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,10240,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,10240,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,8192,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,8192,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,65536,0.0904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,16384,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,10240,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,8192,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,65536,0.08499199897050858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,7168,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,7168,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,6144,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,7168,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,6144,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,5120,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,6144,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,5120,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,4096,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,5120,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,65536,0.11849600076675415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,3584,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,3584,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,3072,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,3072,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,2560,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,2048,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,1536,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,768,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,12288,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,12288,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,16384,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,16384,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,16384,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,12288,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,10240,0.036928001791238785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,10240,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,8192,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,10240,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,8192,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,65536,0.08233600109815598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,7168,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,8192,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,65536,0.09724800288677216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,7168,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,6144,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,6144,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,7168,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,5120,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,6144,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,5120,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,65536,0.11766400188207626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,4096,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,3584,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,3072,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,3072,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,2560,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,2048,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,1536,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,1024,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,12288,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,12288,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,16384,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,16384,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,16384,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,12288,0.04652800038456917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,10240,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,10240,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,8192,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,10240,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,8192,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,8192,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,7168,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,7168,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,65536,0.06896000355482101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,65536,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,7168,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,6144,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,6144,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,6144,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,5120,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,5120,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,4096,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,65536,0.11695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,4096,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,3072,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,3584,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,3072,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,2560,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,2048,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,1536,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,12288,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,12288,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,16384,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,16384,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,16384,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,12288,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,10240,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,10240,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,10240,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,8192,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,65536,0.06348799914121628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,65536,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,7168,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,8192,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,7168,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,7168,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,5120,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,6144,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,6144,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,65536,0.11680000275373459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,6144,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,5120,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,4096,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,3584,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,4096,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,8192,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,3584,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,3072,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,2560,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,2048,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,1024,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,12288,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,12288,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,16384,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,16384,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,16384,0.05686400085687637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,12288,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,10240,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,10240,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,10240,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,8192,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,65536,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,65536,0.07318399846553802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,8192,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,65536,0.11670400202274323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,8192,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,7168,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,6144,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,6144,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,5120,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,6144,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,5120,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,5120,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,4096,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,4096,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,4096,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,3584,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,3584,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,2560,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,3072,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,2560,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,2048,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,1536,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,32,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,64,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,12288,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,12288,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,16384,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,16384,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,16384,0.057472001761198044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,12288,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,10240,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,10240,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,10240,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,8192,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,8192,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,65536,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,65536,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,8192,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,7168,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,7168,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,6144,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,6144,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,7168,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,5120,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,6144,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,65536,0.11638399958610535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,5120,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,4096,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,3584,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,3584,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,3072,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,2560,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,2048,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,1024,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,1536,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,32,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,12288,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,16384,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,12288,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,16384,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,16384,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,12288,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,10240,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,10240,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,10240,0.0379519984126091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,8192,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,8192,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,65536,0.04713600128889084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,65536,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,8192,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,7168,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,7168,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,7168,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,6144,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,6144,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,6144,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,5120,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,65536,0.11644800007343292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,5120,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,4096,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,4096,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,3584,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,3072,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,2048,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,5120,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,12288,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,12288,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,16384,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,16384,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,16384,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,12288,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,10240,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,10240,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,8192,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,10240,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,65536,0.06028800085186958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,65536,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,8192,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,8192,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,7168,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,7168,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,7168,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,5120,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,6144,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,5120,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,65536,0.20838400721549988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,5120,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,3584,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,3584,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,4096,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,3072,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,2048,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,2560,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,2048,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,1536,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,32,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,12288,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,12288,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,16384,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,16384,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,16384,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,12288,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,10240,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,10240,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,10240,0.038943998515605927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,8192,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,8192,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,7168,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,65536,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,8192,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,7168,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,6144,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,7168,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,6144,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,6144,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,65536,0.11574400216341019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,5120,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,5120,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,5120,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,65536,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,4096,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,3584,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,3584,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,3072,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,2560,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,2048,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,2048,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,1536,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,1024,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,768,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,768,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,512,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,64,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,32,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,12288,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,12288,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,16384,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,16384,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,16384,0.05615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,12288,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,10240,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,10240,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,8192,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,65536,0.041919998824596405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,8192,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,65536,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,7168,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,8192,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,6144,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,6144,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,7168,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,6144,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,7168,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,65536,0.19910399615764618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,5120,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,4096,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,4096,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,3584,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,3072,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,2560,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,1536,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,1536,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,1024,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,512,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,768,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,32,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,12288,0.323743999004364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,10240,0.2709760069847107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,12288,0.3584960103034973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,10240,0.2934719920158386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,12288,0.23417599499225616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,16384,0.4875200092792511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,8192,0.22575999796390533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,16384,0.42160001397132874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,8192,0.15478399395942688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,7168,0.2210880070924759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,7168,0.19539199769496918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,8192,0.2489600032567978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,10240,0.2001280039548874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,16384,0.2858879864215851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,7168,0.1406400054693222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,6144,0.16204799711704254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,6144,0.18675200641155243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,5120,0.13840000331401825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,5120,0.16204799711704254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,4096,0.1175680011510849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,6144,0.1265600025653839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,5120,0.10576000064611435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,3584,0.09875199943780899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,4096,0.0862400010228157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,4096,0.13203200697898865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,3584,0.12195199728012085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,3072,0.1019200012087822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,3584,0.0783040001988411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,3072,0.08723200112581253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,2560,0.08617600053548813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,2048,0.07097599655389786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,2560,0.07475200295448303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,2048,0.06419199705123901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,3072,0.06956800073385239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,2560,0.060127999633550644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,1536,0.05455999821424484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,1536,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,2048,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,1024,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,1024,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,768,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,1536,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,1024,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,768,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,512,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,512,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,768,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,256,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,256,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,512,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,256,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,128,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,128,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,128,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,64,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,64,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,32,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,64,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,32,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,32,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,12288,0.09510400146245956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,16384,0.1125440001487732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,16384,0.1281599998474121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,16384,0.10719999670982361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,10240,0.0817599967122078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,12288,0.08460800349712372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,10240,0.07212799787521362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,12288,0.08409599959850311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,65536,0.4246079921722412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,8192,0.06092799827456474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,8192,0.06672000139951706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,10240,0.07311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,8192,0.06006399914622307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,7168,0.055615998804569244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,7168,0.061055999249219894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,6144,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,6144,0.05145600065588951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,7168,0.0541439987719059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,65536,0.5065919756889343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,5120,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,6144,0.04809600114822388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,5120,0.060416001826524734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,4096,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,4096,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,5120,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,3584,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,4096,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,3584,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,3072,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,3584,0.03209599852561951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,3072,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,3072,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,2560,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,2560,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,65536,0.41948801279067993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,2048,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,2560,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,2048,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,2048,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,1536,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,1536,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,1024,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,1024,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,1536,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,768,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,768,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,1024,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,768,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,512,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,512,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,256,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,128,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,64,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,32,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,32,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,32,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,12288,0.08054400235414505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,12288,0.08975999802350998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,16384,0.10460799932479858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,16384,0.10486400127410889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,16384,0.08124800026416779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,10240,0.07075200229883194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,10240,0.08323200047016144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,12288,0.06672000139951706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,10240,0.05593600124120712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,8192,0.05734400078654289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,8192,0.06374400109052658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,65536,0.44972801208496094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,7168,0.05161599814891815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,8192,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,7168,0.05132799968123436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,6144,0.048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,7168,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,65536,0.4472000002861023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,65536,0.30300799012184143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,5120,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,6144,0.0644799992442131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,6144,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,5120,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,4096,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,4096,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,5120,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,4096,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,3584,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,3584,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,3072,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,3072,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,3584,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,3072,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,2560,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,2560,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,2048,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,2560,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,2048,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,1536,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,1536,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,2048,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,1024,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,1536,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,1024,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,1024,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,768,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,768,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,512,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,512,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,256,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,128,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,128,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,64,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,32,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,32,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,12288,0.07919999957084656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,16384,0.10502400249242783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,16384,0.1189119964838028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,12288,0.08105599880218506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,16384,0.07939200103282928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,10240,0.06719999760389328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,12288,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,10240,0.06918399780988693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,8192,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,10240,0.054016001522541046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,8192,0.05772799998521805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,7168,0.05129599943757057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,65536,0.4129599928855896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,8192,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,65536,0.4010240137577057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,6144,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,7168,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,6144,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,65536,0.29737600684165955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,7168,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,5120,0.043455999344587326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,5120,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,6144,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,4096,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,5120,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,4096,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,3584,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,3584,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,4096,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,3072,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,3584,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,2560,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,3072,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,3072,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,2560,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,2560,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,2048,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,2048,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,1536,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,2048,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,1536,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,1536,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,1024,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,1024,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,768,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,1024,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,768,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,512,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,512,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,256,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,32,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,32,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,12288,0.0660799965262413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,12288,0.0605119988322258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,16384,0.06617599725723267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,16384,0.07331199944019318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,16384,0.057472001761198044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,12288,0.046271998435258865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,10240,0.05536000058054924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,10240,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,10240,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,8192,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,65536,0.21068799495697021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,8192,0.04662400111556053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,7168,0.041919998824596405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,65536,0.22623999416828156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,7168,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,7168,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,6144,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,6144,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,5120,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,6144,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,4096,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,4096,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,5120,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,3584,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,3584,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,4096,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,65536,0.21299199759960175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,3072,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,3072,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,2560,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,2560,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,2048,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,5120,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,2048,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,1536,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,1536,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,1024,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,1024,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,768,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,1536,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,768,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,768,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,512,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,32,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,12288,0.05212799832224846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,12288,0.05276799947023392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,16384,0.0594559982419014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,16384,0.06483200192451477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,16384,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,10240,0.05004800111055374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,12288,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,10240,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,10240,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,8192,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,8192,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,7168,0.04630399867892265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,65536,0.1812479943037033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,65536,0.18083199858665466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,7168,0.04396799951791763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,6144,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,6144,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,7168,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,6144,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,5120,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,5120,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,5120,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,4096,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,4096,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,3584,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,3584,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,65536,0.20387199521064758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,3072,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,3072,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,2560,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,2560,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,2560,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,2048,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,2048,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,1536,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,1536,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,1024,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,1024,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,768,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,768,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,768,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,512,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,128,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,12288,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,16384,0.054016001522541046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,16384,0.061792001128196716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,12288,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,16384,0.05657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,12288,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,10240,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,10240,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,65536,0.17766399681568146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,10240,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,8192,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,8192,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,65536,0.188960000872612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,7168,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,8192,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,7168,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,7168,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,6144,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,6144,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,6144,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,5120,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,5120,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,4096,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,4096,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,3584,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,65536,0.20579199492931366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,3584,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,4096,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,3584,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,3072,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,3072,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,3072,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,2560,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,2560,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,2048,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,2560,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,2048,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,1536,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,1536,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,1024,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,1024,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,768,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,1024,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,512,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,12288,0.06911999732255936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,12288,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,16384,0.04931199923157692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,16384,0.08934400230646133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,16384,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,10240,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,12288,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,10240,0.036959998309612274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,10240,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,8192,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,8192,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,65536,0.14044800400733948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,65536,0.13731199502944946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,7168,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,8192,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,7168,0.04368000105023384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,6144,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,6144,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,7168,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,5120,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,6144,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,5120,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,5120,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,4096,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,4096,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,4096,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,3584,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,65536,0.20425599813461304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,3584,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,3072,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,3072,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,2560,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,2048,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,2048,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,2048,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,1536,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,1536,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,1536,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,1024,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,1024,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,768,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,768,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,768,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,2560,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,128,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,64,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,12288,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,12288,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,16384,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,16384,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,16384,0.03596799820661545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,12288,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,10240,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,10240,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,8192,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,10240,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,8192,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,7168,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,8192,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,65536,0.12931199371814728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,65536,0.12166400253772736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,7168,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,65536,0.11539199948310852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,6144,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,6144,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,7168,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,6144,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,5120,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,5120,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,4096,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,5120,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,4096,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,4096,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,3584,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,3072,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,3584,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,3584,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,3072,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,3072,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,2560,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,2560,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,2048,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,2048,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,1536,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,1536,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,2048,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,1536,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,768,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,12288,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,16384,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,16384,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,16384,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,12288,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,10240,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,10240,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,10240,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,8192,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,12288,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,65536,0.10502400249242783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,8192,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,65536,0.11635199934244156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,65536,0.12115199863910675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,8192,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,7168,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,7168,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,6144,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,6144,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,7168,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,6144,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,5120,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,5120,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,4096,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,4096,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,3584,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,3072,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,3584,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,3072,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,2560,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,2560,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,2048,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,2048,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,1536,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,12288,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,12288,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,16384,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,16384,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,16384,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,12288,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,10240,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,10240,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,10240,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,8192,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,8192,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,8192,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,65536,0.09628800302743912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,7168,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,7168,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,65536,0.11919999867677689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,6144,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,7168,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,65536,0.095551997423172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,6144,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,5120,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,5120,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,6144,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,4096,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,5120,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,4096,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,3584,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,4096,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,3584,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,3584,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,3072,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,2560,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,2560,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,2048,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,2560,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,1536,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,12288,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,16384,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,16384,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,12288,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,16384,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,12288,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,10240,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,10240,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,10240,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,8192,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,8192,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,65536,0.084927998483181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,7168,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,65536,0.0857279971241951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,8192,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,7168,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,6144,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,6144,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,5120,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,6144,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,5120,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,4096,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,5120,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,4096,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,65536,0.11875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,3584,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,3584,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,3072,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,3072,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,2560,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,2560,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,2048,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,1024,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,1024,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,768,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,512,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,12288,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,12288,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,16384,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,16384,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,16384,0.0586559996008873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,12288,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,10240,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,10240,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,10240,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,8192,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,8192,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,8192,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,7168,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,65536,0.09593600034713745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,65536,0.08131200075149536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,65536,0.11772800236940384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,7168,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,6144,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,6144,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,5120,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,6144,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,5120,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,5120,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,4096,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,4096,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,3584,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,3584,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,3072,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,3584,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,3072,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,2560,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,7168,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,2560,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,2048,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,1536,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,768,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,12288,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,12288,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,16384,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,16384,0.03545600175857544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,16384,0.05711999908089638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,12288,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,10240,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,8192,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,10240,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,10240,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,8192,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,7168,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,8192,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,65536,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,7168,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,65536,0.11692799627780914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,65536,0.06825599819421768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,6144,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,6144,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,5120,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,6144,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,5120,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,4096,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,3584,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,3072,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,3072,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,2560,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,2048,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,2048,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,1536,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,1024,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,1024,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,12288,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,16384,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,16384,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,16384,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,12288,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,12288,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,10240,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,10240,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,8192,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,10240,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,65536,0.06419199705123901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,65536,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,65536,0.11641599982976913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,8192,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,7168,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,8192,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,7168,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,6144,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,6144,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,5120,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,7168,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,5120,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,6144,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,4096,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,3584,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,3072,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,2560,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,2560,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,2048,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,2048,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,1536,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,1024,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,12288,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,12288,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,16384,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,16384,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,16384,0.05660799890756607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,12288,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,10240,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,10240,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,8192,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,8192,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,65536,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,65536,0.05663999915122986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,7168,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,8192,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,6144,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,6144,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,7168,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,7168,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,6144,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,5120,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,4096,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,3584,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,5120,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,4096,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,65536,0.1167680025100708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,4096,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,3072,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,2560,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,2048,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,1536,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,3584,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,64,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,12288,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,12288,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,16384,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,16384,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,12288,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,16384,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,10240,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,10240,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,10240,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,8192,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,8192,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,65536,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,65536,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,8192,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,65536,0.11654400080442429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,7168,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,7168,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,6144,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,7168,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,6144,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,5120,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,6144,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,5120,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,4096,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,3584,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,4096,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,3584,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,3072,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,2560,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,2048,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,12288,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,16384,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,16384,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,16384,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,12288,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,10240,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,10240,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,12288,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,65536,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,8192,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,8192,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,65536,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,10240,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,8192,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,7168,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,7168,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,6144,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,6144,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,7168,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,6144,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,5120,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,65536,0.11574400216341019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,5120,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,5120,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,4096,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,4096,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,3584,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,3072,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,2048,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,1536,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,1024,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,12288,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,12288,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,16384,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,16384,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,16384,0.05724800005555153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,12288,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,10240,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,10240,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,10240,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,8192,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,8192,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,65536,0.04374400153756142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,7168,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,8192,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,65536,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,7168,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,6144,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,7168,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,6144,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,6144,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,65536,0.1159679964184761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,5120,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,4096,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,3584,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,3584,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,3072,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,2560,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,2048,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,1536,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,64,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,12288,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,16384,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,12288,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,16384,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,16384,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,12288,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,10240,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,10240,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,10240,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,8192,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,8192,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,65536,0.0432640016078949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,65536,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,7168,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,8192,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,7168,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,6144,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,6144,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,7168,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,6144,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,5120,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,5120,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,65536,0.11548800021409988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,5120,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,4096,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,4096,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,3584,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,3072,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,2560,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,2048,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,1536,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,1536,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,1024,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,768,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,512,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,12288,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,12288,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,16384,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,16384,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,16384,0.05459199845790863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,12288,0.043616000562906265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,10240,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,10240,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,10240,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,8192,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,65536,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,8192,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,65536,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,7168,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,7168,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,6144,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,7168,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,5120,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,6144,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,65536,0.19990399479866028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,5120,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,5120,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,8192,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,4096,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,3584,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,3072,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,2048,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,2560,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,1536,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,1024,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,128,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,12288,0.24563199281692505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,10240,0.2043839991092682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,16384,0.3202880024909973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,10240,0.20131200551986694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,12288,0.2351360023021698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,16384,0.3107520043849945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,8192,0.16649599373340607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,12288,0.20953600108623505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,8192,0.16179199516773224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,7168,0.14364799857139587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,7168,0.15011200308799744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,10240,0.17664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,16384,0.2791999876499176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,8192,0.14524799585342407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,6144,0.12751999497413635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,6144,0.12918399274349213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,7168,0.12675200402736664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,5120,0.10742399841547012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,4096,0.09120000153779984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,4096,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,5120,0.10976000130176544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,6144,0.11289600282907486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,5120,0.09705600142478943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,4096,0.08035200089216232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,3584,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,3584,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,3072,0.07168000191450119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,3072,0.0697920024394989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,3584,0.07100799679756165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,3072,0.063680000603199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,2560,0.061664000153541565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,2560,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,2048,0.05196800082921982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,2048,0.05209600180387497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,2560,0.05696000158786774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,2048,0.04809600114822388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,1536,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,1024,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,1536,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,1024,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,1024,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,768,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,1536,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,768,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,768,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,512,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,512,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,256,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,256,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,512,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,256,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,128,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,128,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,64,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,64,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,32,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,32,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,128,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,12288,0.0671359971165657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,16384,0.08937600255012512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,16384,0.08326400071382523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,12288,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,16384,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,10240,0.05929600074887276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,12288,0.05814399942755699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,10240,0.05628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,8192,0.047200001776218414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,10240,0.051392000168561935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,8192,0.04867200180888176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,65536,0.3283199965953827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,8192,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,7168,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,7168,0.04262400045990944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,6144,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,6144,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,7168,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,65536,0.3110080063343048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,5120,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,6144,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,5120,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,4096,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,5120,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,4096,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,3584,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,3584,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,4096,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,3584,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,3072,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,65536,0.28255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,3072,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,3072,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,2560,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,2560,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,2048,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,2560,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,2048,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,1536,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,2048,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,1536,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,1024,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,1536,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,768,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,768,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,1024,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,32,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,12288,0.06998399645090103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,12288,0.06617599725723267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,16384,0.08966399729251862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,16384,0.0828159973025322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,16384,0.06204799935221672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,12288,0.04787199944257736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,10240,0.0597120001912117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,10240,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,10240,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,8192,0.04899200052022934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,8192,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,7168,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,7168,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,8192,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,6144,0.037408001720905304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,65536,0.3158079981803894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,7168,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,6144,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,65536,0.32873600721359253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,6144,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,5120,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,5120,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,5120,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,4096,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,4096,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,3584,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,4096,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,65536,0.21862399578094482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,3584,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,3072,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,3072,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,2560,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,2560,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,3072,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,2560,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,2048,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,2048,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,1536,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,2048,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,1536,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,1024,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,1536,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,3584,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,1024,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,1024,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,768,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,768,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,512,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,256,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,512,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,64,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,32,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,12288,0.06982400268316269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,12288,0.06611199676990509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,16384,0.09193599969148636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,16384,0.08316799998283386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,16384,0.05897599831223488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,10240,0.05510399863123894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,10240,0.059007998555898666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,12288,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,10240,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,8192,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,8192,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,7168,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,8192,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,65536,0.331712007522583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,7168,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,7168,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,6144,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,6144,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,65536,0.30716800689697266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,5120,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,5120,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,6144,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,4096,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,4096,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,3584,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,5120,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,4096,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,3584,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,3584,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,3072,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,3072,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,65536,0.20787200331687927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,2560,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,2560,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,3072,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,2560,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,2048,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,2048,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,2048,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,1536,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,1536,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,1024,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,1536,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,1024,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,768,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,768,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,512,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,64,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,12288,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,12288,0.04396799951791763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,16384,0.054336000233888626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,16384,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,16384,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,12288,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,10240,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,8192,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,10240,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,8192,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,65536,0.17046399414539337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,7168,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,7168,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,8192,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,65536,0.16169600188732147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,6144,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,7168,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,6144,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,6144,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,5120,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,5120,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,4096,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,5120,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,3584,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,4096,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,3584,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,3072,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,3072,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,2048,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,1536,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,1536,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,65536,0.15091200172901154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,768,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,32,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,12288,0.041919998824596405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,12288,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,16384,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,16384,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,16384,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,12288,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,10240,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,10240,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,10240,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,8192,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,8192,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,65536,0.1509760022163391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,7168,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,8192,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,7168,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,65536,0.14448000490665436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,7168,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,6144,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,5120,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,6144,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,5120,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,6144,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,4096,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,4096,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,5120,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,4096,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,3584,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,65536,0.14259199798107147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,3584,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,2048,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,1536,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,1536,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,12288,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,12288,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,16384,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,16384,0.04473600164055824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,16384,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,10240,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,10240,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,12288,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,8192,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,10240,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,8192,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,65536,0.13247999548912048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,7168,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,7168,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,8192,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,65536,0.1305599957704544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,6144,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,6144,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,7168,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,5120,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,6144,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,5120,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,5120,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,4096,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,65536,0.13475200533866882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,4096,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,3584,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,3072,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,3072,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,2560,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,2560,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,2560,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,1536,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,1024,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,768,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,768,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,12288,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,12288,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,16384,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,16384,0.04169600084424019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,16384,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,10240,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,10240,0.05011200159788132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,12288,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,8192,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,8192,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,65536,0.11366400122642517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,7168,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,8192,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,65536,0.26950401067733765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,7168,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,6144,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,6144,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,7168,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,6144,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,5120,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,5120,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,4096,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,5120,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,10240,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,65536,0.12915199995040894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,4096,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,4096,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,3584,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,2560,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,2560,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,2048,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,2048,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,1536,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,1024,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,768,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,768,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,512,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,256,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,32,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,12288,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,12288,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,16384,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,16384,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,16384,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,10240,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,12288,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,10240,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,10240,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,8192,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,8192,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,8192,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,65536,0.0960640013217926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,65536,0.09279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,7168,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,7168,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,6144,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,6144,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,5120,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,5120,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,7168,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,6144,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,5120,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,4096,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,4096,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,3584,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,65536,0.12326399981975555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,3584,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,3072,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,3072,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,2560,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,2048,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,1536,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,768,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,32,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,12288,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,16384,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,16384,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,16384,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,12288,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,12288,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,10240,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,10240,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,8192,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,10240,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,8192,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,65536,0.08636800199747086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,7168,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,65536,0.08521600067615509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,8192,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,7168,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,6144,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,5120,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,6144,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,7168,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,6144,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,4096,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,65536,0.12172800302505493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,5120,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,3584,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,4096,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,3584,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,3072,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,2048,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,2560,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,2048,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,768,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,512,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,12288,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,12288,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,16384,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,16384,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,16384,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,12288,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,10240,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,10240,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,10240,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,8192,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,8192,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,65536,0.07811199873685837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,7168,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,65536,0.0783040001988411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,8192,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,7168,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,6144,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,6144,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,7168,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,6144,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,5120,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,5120,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,4096,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,65536,0.12220799922943115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,4096,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,3584,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,3072,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,3072,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,2560,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,5120,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,2048,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,1536,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,1024,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,12288,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,16384,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,16384,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,16384,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,10240,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,12288,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,10240,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,8192,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,10240,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,8192,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,8192,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,65536,0.07049600034952164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,7168,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,7168,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,65536,0.06963200122117996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,6144,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,7168,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,6144,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,6144,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,5120,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,5120,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,5120,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,4096,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,4096,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,65536,0.12054400146007538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,3072,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,3584,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,3072,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,2560,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,2560,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,2048,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,2048,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,1536,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,1024,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,768,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,12288,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,12288,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,16384,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,16384,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,16384,0.06080000102519989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,12288,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,10240,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,10240,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,10240,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,8192,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,8192,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,7168,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,8192,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,65536,0.06207999959588051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,7168,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,7168,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,6144,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,5120,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,6144,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,5120,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,65536,0.12083200365304947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,5120,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,4096,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,4096,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,65536,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,3072,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,3584,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,3072,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,2560,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,2560,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,2048,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,1536,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,1024,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,12288,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,12288,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,16384,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,16384,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,16384,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,12288,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,10240,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,10240,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,10240,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,8192,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,8192,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,65536,0.06441599875688553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,65536,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,8192,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,7168,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,7168,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,6144,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,5120,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,6144,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,5120,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,5120,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,4096,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,65536,0.11875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,4096,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,3584,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,3584,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,3072,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,2560,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,1536,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,768,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,12288,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,12288,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,16384,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,16384,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,16384,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,12288,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,10240,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,10240,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,10240,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,8192,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,8192,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,65536,0.05164799839258194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,7168,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,65536,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,8192,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,7168,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,7168,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,6144,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,6144,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,5120,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,6144,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,5120,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,5120,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,4096,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,65536,0.1194240003824234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,3584,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,3584,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,2048,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,1536,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,768,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,768,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,12288,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,16384,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,16384,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,16384,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,12288,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,10240,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,10240,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,65536,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,65536,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,10240,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,8192,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,8192,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,12288,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,8192,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,7168,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,6144,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,7168,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,5120,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,6144,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,6144,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,65536,0.11852800101041794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,5120,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,4096,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,5120,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,4096,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,4096,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,3584,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,2560,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,3072,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,2560,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,2048,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,2048,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,768,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,1536,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,1024,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,768,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,12288,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,12288,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,16384,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,16384,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,16384,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,10240,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,12288,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,10240,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,10240,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,8192,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,8192,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,7168,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,8192,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,65536,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,65536,0.04819199815392494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,7168,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,6144,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,7168,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,6144,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,5120,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,6144,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,65536,0.11846400052309036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,5120,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,4096,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,4096,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,3584,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,3072,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,2560,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,3072,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,2560,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,2560,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,2048,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,2048,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,768,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,1024,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,64,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,32,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,32,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,12288,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,16384,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,16384,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,12288,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,16384,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,12288,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,10240,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,8192,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,10240,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,10240,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,8192,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,65536,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,65536,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,8192,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,7168,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,7168,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,6144,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,5120,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,7168,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,5120,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,6144,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,65536,0.1181119978427887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,5120,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,3584,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,4096,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,3584,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,2560,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,2560,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,3072,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,2560,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,2048,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,1536,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,768,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,1536,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,32,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,12288,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,12288,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,16384,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,16384,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,16384,0.05907199904322624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,12288,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,10240,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,10240,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,10240,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,8192,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,8192,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,65536,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,8192,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,65536,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,7168,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,7168,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,6144,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,7168,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,6144,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,65536,0.11807999759912491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,4096,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,4096,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,5120,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,3584,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,3584,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,3584,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,3072,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,3072,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,3072,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,2560,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,2560,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,2048,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,2048,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,5120,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,2048,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,1536,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,1024,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,1536,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,1024,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,32,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,12288,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,12288,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,16384,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,16384,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,16384,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,12288,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,10240,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,10240,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,10240,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,8192,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,8192,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,8192,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,7168,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,65536,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,65536,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,7168,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,6144,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,7168,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,5120,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,6144,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,6144,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,5120,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,5120,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,65536,0.11795199662446976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,4096,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,4096,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,3584,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,3584,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,3072,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,3584,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,3072,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,2560,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,3072,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,2048,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,2048,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,2560,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,2560,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,2048,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,64,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,12288,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,16384,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,16384,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,12288,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,16384,0.05225599929690361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,10240,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,12288,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,8192,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,10240,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,10240,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,65536,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,65536,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,8192,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,7168,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,8192,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,7168,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,6144,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,7168,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,65536,0.20243200659751892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,5120,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,5120,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,6144,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,4096,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,4096,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,3584,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,3072,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,3072,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,3584,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,3072,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,2560,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,2560,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,2048,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,2048,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,1536,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,768,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,512,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,64,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,64,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,32,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,12288,0.24435199797153473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,10240,0.20479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,16384,0.32604798674583435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,12288,0.23737600445747375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,10240,0.2011519968509674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,12288,0.2059839963912964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,16384,0.31007999181747437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,8192,0.1621759980916977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,10240,0.17308799922466278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,7168,0.14892800152301788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,8192,0.14006400108337402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,8192,0.16838400065898895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,7168,0.14505599439144135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,16384,0.263808012008667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,5120,0.11043199896812439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,6144,0.13132800161838531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,7168,0.12505599856376648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,6144,0.125791996717453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,4096,0.09126400202512741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,5120,0.10844799876213074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,5120,0.09494400024414062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,6144,0.11007999628782272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,4096,0.09132800251245499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,3584,0.081216000020504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,3584,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,4096,0.07968000322580338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,3072,0.07180800288915634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,3072,0.0716480016708374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,3584,0.07081600278615952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,2560,0.061824001371860504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,2560,0.06201599910855293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,2048,0.05283199995756149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,2048,0.052928000688552856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,2560,0.05539200082421303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,1536,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,1536,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,2048,0.04809600114822388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,1024,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,1536,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,1024,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,768,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,1024,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,768,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,512,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,512,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,768,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,256,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,3072,0.06310400366783142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,512,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,256,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,256,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,128,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,128,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,64,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,128,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,64,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,32,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,32,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,12288,0.06934399902820587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,12288,0.06518399715423584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,16384,0.0904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,16384,0.084927998483181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,16384,0.07552000135183334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,10240,0.05929600074887276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,12288,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,10240,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,10240,0.051231998950242996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,8192,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,8192,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,65536,0.3298560082912445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,7168,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,8192,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,7168,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,6144,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,6144,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,7168,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,5120,0.033504001796245575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,5120,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,6144,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,65536,0.31113600730895996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,5120,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,4096,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,4096,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,3584,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,4096,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,3584,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,3584,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,3072,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,3072,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,2560,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,2560,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,3072,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,2048,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,65536,0.2781440019607544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,2560,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,2048,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,2048,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,1536,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,1536,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,1024,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,1024,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,1536,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,768,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,512,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,512,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,512,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,128,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,64,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,32,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,32,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,16384,0.08483199775218964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,16384,0.09017600119113922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,12288,0.06470400094985962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,16384,0.05846399813890457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,10240,0.059167999774217606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,12288,0.04918399825692177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,10240,0.05571199953556061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,12288,0.06854400038719177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,65536,0.331743985414505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,10240,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,8192,0.0496320016682148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,8192,0.04707200080156326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,8192,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,7168,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,65536,0.3138880133628845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,7168,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,6144,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,6144,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,7168,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,5120,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,6144,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,5120,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,4096,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,4096,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,5120,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,3584,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,4096,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,3072,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,65536,0.2144639939069748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,3072,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,3584,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,3072,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,2560,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,2560,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,2048,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,2048,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,1536,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,2048,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,1536,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,1536,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,1024,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,1024,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,1024,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,768,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,768,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,512,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,256,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,512,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,32,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,32,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,12288,0.0689919963479042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,12288,0.06499200314283371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,16384,0.08931200206279755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,16384,0.08473599702119827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,12288,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,16384,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,10240,0.0551999993622303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,10240,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,8192,0.04652800038456917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,8192,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,10240,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,65536,0.3285439908504486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,8192,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,7168,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,7168,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,6144,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,6144,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,65536,0.31679999828338623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,7168,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,65536,0.20419199764728546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,5120,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,5120,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,6144,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,4096,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,5120,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,4096,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,3584,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,3584,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,3072,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,3584,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,3072,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,3072,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,2560,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,2560,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,2048,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,2560,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,2048,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,2048,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,1536,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,1536,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,1536,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,1024,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,768,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,1024,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,768,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,1024,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,512,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,256,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,512,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,12288,0.04335999861359596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,16384,0.05459199845790863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,16384,0.05289600044488907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,12288,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,16384,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,10240,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,10240,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,12288,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,10240,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,8192,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,8192,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,65536,0.1719360053539276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,7168,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,8192,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,65536,0.1483840048313141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,7168,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,65536,0.16259199380874634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,6144,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,6144,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,7168,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,5120,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,6144,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,5120,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,4096,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,4096,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,5120,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,3584,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,3584,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,3584,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,3072,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,3072,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,3072,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,2560,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,2048,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,2560,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,2048,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,1536,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,2048,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,1024,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,1024,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,12288,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,12288,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,16384,0.04950400069355965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,16384,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,16384,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,10240,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,10240,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,12288,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,10240,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,8192,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,8192,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,8192,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,65536,0.15343999862670898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,65536,0.1446399986743927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,7168,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,6144,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,6144,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,7168,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,5120,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,6144,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,5120,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,5120,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,4096,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,3584,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,3584,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,4096,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,65536,0.14035199582576752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,3072,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,7168,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,3072,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,2560,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,2048,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,1536,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,1536,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,1536,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,1024,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,12288,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,12288,0.037408001720905304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,16384,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,16384,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,16384,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,12288,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,10240,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,10240,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,8192,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,10240,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,65536,0.1327359974384308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,8192,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,7168,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,65536,0.12905600666999817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,6144,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,7168,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,6144,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,5120,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,6144,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,5120,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,5120,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,4096,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,3584,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,4096,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,3584,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,65536,0.13206399977207184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,3584,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,3072,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,2560,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,2560,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,3072,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,2048,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,2048,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,1536,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,1024,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,1024,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,1536,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,1024,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,768,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,768,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,512,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,32,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,12288,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,16384,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,16384,0.04278400167822838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,16384,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,12288,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,10240,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,12288,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,10240,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,8192,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,10240,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,65536,0.11488000303506851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,8192,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,7168,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,65536,0.2693760097026825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,7168,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,8192,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,65536,0.12432000041007996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,7168,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,5120,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,5120,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,6144,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,6144,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,4096,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,4096,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,5120,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,4096,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,3584,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,3584,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,3072,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,3584,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,3072,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,2560,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,2560,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,2560,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,2048,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,1536,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,2048,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,1024,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,768,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,512,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,12288,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,16384,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,16384,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,16384,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,10240,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,10240,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,10240,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,8192,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,8192,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,65536,0.09756799787282944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,7168,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,8192,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,65536,0.09276799857616425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,7168,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,6144,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,6144,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,7168,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,5120,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,6144,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,5120,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,4096,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,5120,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,3584,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,65536,0.12019199877977371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,4096,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,3072,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,3072,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,2560,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,2048,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,1536,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,768,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,3584,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,12288,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,12288,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,16384,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,16384,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,12288,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,16384,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,10240,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,10240,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,8192,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,8192,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,10240,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,7168,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,8192,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,65536,0.08505599945783615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,65536,0.08854400366544724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,7168,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,6144,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,7168,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,6144,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,6144,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,5120,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,5120,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,5120,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,3584,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,65536,0.11919999867677689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,4096,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,3072,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,3072,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,2048,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,1536,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,12288,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,16384,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,16384,0.036928001791238785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,16384,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,12288,0.049056001007556915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,10240,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,10240,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,8192,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,12288,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,65536,0.07862400263547897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,10240,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,65536,0.07631999999284744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,8192,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,7168,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,8192,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,7168,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,6144,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,7168,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,6144,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,6144,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,5120,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,4096,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,4096,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,65536,0.11727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,5120,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,3584,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,4096,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,3584,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,3072,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,2560,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,2048,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,2048,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,1536,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,1024,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,12288,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,12288,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,16384,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,16384,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,16384,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,10240,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,12288,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,10240,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,10240,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,8192,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,8192,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,65536,0.06924799829721451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,7168,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,65536,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,65536,0.11753600090742111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,7168,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,6144,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,5120,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,7168,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,6144,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,6144,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,5120,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,3584,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,3584,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,3584,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,4096,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,3072,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,2560,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,2048,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,1024,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,1024,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,12288,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,12288,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,16384,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,16384,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,16384,0.058368001133203506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,12288,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,10240,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,10240,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,10240,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,8192,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,8192,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,65536,0.06265600025653839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,65536,0.06272000074386597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,7168,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,7168,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,8192,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,7168,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,5120,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,6144,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,5120,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,5120,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,4096,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,4096,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,65536,0.11683200299739838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,4096,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,3584,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,3584,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,3072,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,2048,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,2560,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,2048,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,1536,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,768,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,12288,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,12288,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,16384,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,16384,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,12288,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,16384,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,10240,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,10240,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,8192,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,8192,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,10240,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,8192,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,65536,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,65536,0.05443200096487999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,7168,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,6144,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,7168,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,5120,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,6144,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,6144,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,4096,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,65536,0.11609599739313126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,5120,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,5120,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,4096,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,4096,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,7168,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,3584,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,3072,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,3072,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,2560,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,2048,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,1536,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,12288,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,12288,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,16384,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,16384,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,16384,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,12288,0.046271998435258865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,10240,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,10240,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,8192,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,10240,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,8192,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,65536,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,8192,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,65536,0.05132799968123436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,7168,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,6144,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,6144,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,7168,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,5120,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,6144,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,5120,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,5120,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,4096,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,65536,0.11497599631547928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,4096,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,3584,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,3072,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,2560,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,2048,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,12288,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,16384,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,16384,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,16384,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,12288,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,12288,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,10240,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,10240,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,8192,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,65536,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,65536,0.056384000927209854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,8192,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,7168,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,6144,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,6144,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,7168,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,5120,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,6144,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,5120,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,4096,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,4096,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,65536,0.11456000059843063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,3584,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,3072,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,2560,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,1536,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,12288,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,12288,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,16384,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,16384,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,16384,0.05648000165820122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,12288,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,10240,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,10240,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,8192,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,10240,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,65536,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,8192,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,65536,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,7168,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,8192,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,7168,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,6144,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,7168,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,6144,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,5120,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,65536,0.1143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,3584,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,3584,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,3072,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,2048,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,1536,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,5120,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,12288,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,16384,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,12288,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,16384,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,16384,0.05721599981188774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,12288,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,10240,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,10240,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,10240,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,8192,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,65536,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,8192,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,65536,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,65536,0.11564800143241882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,7168,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,8192,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,7168,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,6144,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,7168,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,6144,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,5120,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,5120,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,4096,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,4096,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,4096,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,3584,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,3072,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,2560,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,1536,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,1024,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,256,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,64,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,12288,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,16384,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,16384,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,16384,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,12288,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,10240,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,10240,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,65536,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,8192,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,10240,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,12288,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,65536,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,8192,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,7168,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,8192,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,6144,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,7168,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,6144,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,65536,0.11382400244474411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,6144,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,5120,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,5120,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,5120,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,4096,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,3584,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,3584,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,3072,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,3072,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,2560,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,2048,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,1536,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,1024,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,128,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,64,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,12288,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,12288,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,16384,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,16384,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,16384,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,12288,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,10240,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,10240,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,8192,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,10240,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,8192,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,65536,0.04335999861359596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,65536,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,8192,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,7168,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,7168,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,7168,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,6144,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,6144,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,65536,0.11407999694347382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,5120,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,5120,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,4096,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,4096,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,3584,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,3584,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,2560,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,2560,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,2048,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,1024,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,768,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,512,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,64,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,12288,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,12288,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,16384,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,16384,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,16384,0.055743999779224396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,10240,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,12288,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,10240,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,10240,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,8192,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,65536,0.04416000097990036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,65536,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,8192,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,7168,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,8192,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,7168,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,7168,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,6144,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,6144,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,5120,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,65536,0.19817599654197693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,4096,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,5120,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,4096,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,4096,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,3584,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,3584,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,3072,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,2560,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,2048,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,2048,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,1536,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,512,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,32,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,12288,0.1974399983882904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,16384,0.2633279860019684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,12288,0.21129600703716278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,10240,0.17852799594402313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,16384,0.2770560085773468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,12288,0.20156799256801605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,8192,0.13654400408267975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,16384,0.2593280076980591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,8192,0.1518079936504364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,10240,0.171424001455307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,7168,0.1207680031657219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,10240,0.16915200650691986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,8192,0.14099200069904327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,7168,0.1287360042333603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,7168,0.12319999933242798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,6144,0.11296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,5120,0.0907839983701706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,6144,0.1069440022110939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,5120,0.09663999825716019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,4096,0.07974400371313095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,4096,0.07427199929952621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,6144,0.10870400071144104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,5120,0.09388799965381622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,3584,0.06764800101518631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,3584,0.07158400118350983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,4096,0.07622399926185608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,3072,0.058848001062870026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,3072,0.06339199841022491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,3584,0.06790400296449661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,2560,0.05552000179886818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,2560,0.05238400027155876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,3072,0.058687999844551086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,2048,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,2048,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,1536,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,1536,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,2560,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,2048,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,1024,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,1536,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,1024,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,768,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,768,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,1024,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,512,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,512,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,768,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,256,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,512,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,256,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,128,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,256,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,128,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,64,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,64,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,32,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,64,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,32,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,32,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,12288,0.05580800026655197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,16384,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,12288,0.06083200126886368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,16384,0.07705599814653397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,16384,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,10240,0.05056000128388405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,10240,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,12288,0.05734400078654289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,10240,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,8192,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,8192,0.04809600114822388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,65536,0.2629440128803253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,7168,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,8192,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,7168,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,6144,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,7168,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,6144,0.046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,5120,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,6144,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,65536,0.2739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,5120,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,4096,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,4096,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,5120,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,4096,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,3584,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,3584,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,3072,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,3072,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,3584,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,65536,0.262688010931015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,3072,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,2560,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,2560,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,2560,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,2048,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,2048,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,2048,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,1536,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,1536,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,1536,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,1024,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,1024,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,768,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,1024,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,768,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,512,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,768,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,256,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,512,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,32,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,12288,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,16384,0.07174400240182877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,16384,0.07472000271081924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,12288,0.05852799862623215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,16384,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,12288,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,10240,0.048287998884916306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,10240,0.04956800118088722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,8192,0.04287999868392944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,10240,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,65536,0.25836798548698425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,8192,0.04233599826693535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,7168,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,7168,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,8192,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,6144,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,65536,0.26892799139022827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,6144,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,7168,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,5120,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,6144,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,5120,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,5120,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,4096,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,4096,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,3584,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,4096,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,65536,0.206496000289917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,3584,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,3584,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,3072,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,3072,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,2560,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,3072,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,2560,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,2560,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,2048,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,2048,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,1536,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,1536,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,2048,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,1024,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,1536,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,1024,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,768,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,768,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,512,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,128,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,12288,0.05580800026655197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,12288,0.057312000542879105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,16384,0.07196799665689468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,16384,0.07494399696588516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,16384,0.05641600117087364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,12288,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,10240,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,10240,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,8192,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,10240,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,8192,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,65536,0.2606399953365326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,7168,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,7168,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,65536,0.267520010471344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,6144,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,7168,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,6144,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,6144,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,5120,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,5120,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,4096,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,4096,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,5120,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,65536,0.1987839937210083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,3584,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,3584,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,3072,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,3584,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,3072,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,3072,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,2560,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,2560,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,2048,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,2048,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,1536,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,2048,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,4096,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,1536,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,1024,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,1024,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,768,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,768,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,512,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,32,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,12288,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,12288,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,16384,0.047807998955249786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,16384,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,16384,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,12288,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,10240,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,10240,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,10240,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,65536,0.1380160003900528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,8192,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,8192,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,8192,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,7168,0.03209599852561951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,7168,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,65536,0.15455999970436096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,6144,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,6144,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,7168,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,5120,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,5120,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,4096,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,4096,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,65536,0.14761599898338318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,4096,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,3584,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,3584,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,3072,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,2048,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,1536,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,2048,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,2048,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,1536,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,768,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,32,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,12288,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,12288,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,16384,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,16384,0.046911999583244324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,16384,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,12288,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,10240,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,10240,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,8192,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,10240,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,65536,0.12188799679279327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,8192,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,7168,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,8192,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,7168,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,6144,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,6144,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,7168,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,5120,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,5120,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,6144,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,5120,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,4096,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,65536,0.1382399946451187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,4096,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,65536,0.1363839954137802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,3584,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,3072,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,2560,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,1536,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,12288,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,12288,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,16384,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,16384,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,16384,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,12288,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,10240,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,10240,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,10240,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,8192,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,8192,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,65536,0.1085439994931221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,8192,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,7168,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,65536,0.11926399916410446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,7168,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,6144,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,6144,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,7168,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,6144,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,5120,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,5120,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,5120,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,4096,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,4096,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,65536,0.13132800161838531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,3584,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,3584,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,2560,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,3072,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,2560,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,2048,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,2048,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,1024,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,1024,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,12288,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,16384,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,16384,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,12288,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,16384,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,12288,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,10240,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,10240,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,8192,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,10240,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,8192,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,65536,0.09247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,7168,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,8192,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,7168,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,7168,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,65536,0.10729599744081497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,6144,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,6144,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,5120,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,6144,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,5120,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,4096,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,5120,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,65536,0.12387199699878693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,4096,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,3584,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,4096,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,3072,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,2560,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,2048,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,12288,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,16384,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,16384,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,16384,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,12288,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,10240,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,10240,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,65536,0.07919999957084656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,10240,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,65536,0.08799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,12288,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,8192,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,8192,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,8192,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,7168,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,7168,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,6144,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,65536,0.11807999759912491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,5120,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,4096,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,5120,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,4096,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,3584,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,3072,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,2560,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,1536,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,1024,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,12288,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,12288,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,16384,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,16384,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,16384,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,12288,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,10240,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,10240,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,10240,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,8192,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,8192,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,7168,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,65536,0.08076799660921097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,65536,0.07091200351715088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,8192,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,7168,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,7168,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,5120,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,5120,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,6144,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,6144,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,5120,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,4096,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,4096,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,65536,0.11667200177907944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,3584,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,3072,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,12288,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,16384,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,16384,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,12288,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,16384,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,12288,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,10240,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,10240,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,10240,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,8192,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,65536,0.06480000168085098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,65536,0.07241600006818771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,8192,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,8192,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,7168,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,6144,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,7168,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,5120,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,5120,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,65536,0.11612799763679504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,4096,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,3584,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,3584,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,3584,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,3072,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,2560,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,1024,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,128,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,12288,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,12288,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,16384,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,16384,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,16384,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,12288,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,10240,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,10240,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,10240,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,65536,0.05827200040221214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,8192,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,65536,0.06646399945020676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,8192,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,7168,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,8192,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,7168,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,7168,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,6144,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,6144,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,6144,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,5120,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,65536,0.11535999923944473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,5120,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,4096,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,3584,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,3072,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,3584,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,5120,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,2048,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,1536,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,1024,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,12288,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,12288,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,16384,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,16384,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,16384,0.05766399949789047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,10240,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,12288,0.04630399867892265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,10240,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,8192,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,10240,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,8192,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,65536,0.0522879995405674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,7168,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,65536,0.058111999183893204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,7168,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,7168,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,6144,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,6144,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,5120,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,5120,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,6144,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,5120,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,4096,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,65536,0.11472000181674957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,3072,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,2560,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,2048,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,12288,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,16384,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,16384,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,16384,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,12288,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,12288,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,10240,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,10240,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,65536,0.0522879995405674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,65536,0.06169600039720535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,8192,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,8192,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,10240,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,8192,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,7168,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,7168,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,6144,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,7168,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,6144,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,5120,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,65536,0.11382400244474411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,5120,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,4096,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,6144,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,5120,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,4096,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,3584,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,3072,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,12288,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,12288,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,16384,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,16384,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,16384,0.05628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,12288,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,10240,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,10240,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,8192,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,65536,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,65536,0.04531199857592583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,8192,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,7168,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,7168,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,6144,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,7168,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,6144,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,65536,0.11488000303506851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,5120,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,5120,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,4096,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,3584,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,3072,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,2560,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,2048,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,2048,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,12288,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,12288,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,16384,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,16384,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,16384,0.056543998420238495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,12288,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,10240,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,10240,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,8192,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,10240,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,65536,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,65536,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,8192,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,7168,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,7168,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,8192,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,6144,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,6144,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,7168,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,65536,0.1141119971871376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,5120,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,6144,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,5120,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,4096,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,3584,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,3072,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,3072,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,2560,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,2048,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,1024,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,256,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,64,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,12288,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,16384,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,16384,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,16384,0.05673599988222122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,12288,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,10240,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,10240,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,65536,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,65536,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,12288,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,10240,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,8192,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,8192,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,7168,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,8192,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,7168,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,6144,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,7168,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,6144,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,65536,0.11296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,6144,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,5120,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,3584,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,3584,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,3072,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,2560,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,2048,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,1024,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,12288,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,16384,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,12288,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,16384,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,16384,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,12288,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,10240,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,10240,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,10240,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,8192,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,65536,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,65536,0.04831999912858009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,8192,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,7168,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,8192,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,7168,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,6144,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,6144,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,7168,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,65536,0.20684799551963806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,6144,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,5120,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,5120,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,4096,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,3072,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,3584,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,3072,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,2560,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,2048,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,1536,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,1024,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,128,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,12288,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,12288,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,16384,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,16384,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,16384,0.05686400085687637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,12288,0.04608000069856644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,10240,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,10240,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,65536,0.07203199714422226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,10240,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,8192,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,65536,0.049056001007556915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,8192,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,7168,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,8192,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,7168,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,6144,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,7168,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,6144,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,65536,0.2070080041885376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,5120,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,6144,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,5120,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,4096,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,4096,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,3584,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,3584,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,3072,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,2560,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,2560,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,2048,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,1024,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,32,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,12288,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,12288,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,16384,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,16384,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,16384,0.05628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,12288,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,10240,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,10240,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,10240,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,8192,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,8192,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,65536,0.04931199923157692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,65536,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,8192,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,7168,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,7168,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,6144,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,5120,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,65536,0.2056639939546585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,6144,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,5120,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,3584,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,4096,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,3584,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,3072,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,7168,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,3072,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,2048,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,1536,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,768,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,64,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,32,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,16384,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,12288,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,16384,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,16384,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,12288,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,10240,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,10240,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,10240,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,8192,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,65536,0.04940799996256828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,65536,0.04806400090456009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,8192,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,8192,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,7168,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,6144,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,7168,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,7168,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,65536,0.1995840072631836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,5120,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,6144,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,5120,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,5120,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,4096,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,3584,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,3584,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,3072,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,3584,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,3072,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,2560,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,2048,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,2048,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,512,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,128,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,12288,0.19222399592399597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,16384,0.2542079985141754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,12288,0.21011200547218323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,10240,0.1756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,10240,0.16070400178432465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,16384,0.27801600098609924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,8192,0.1319359987974167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,12288,0.19923199713230133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,8192,0.14470399916172028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,16384,0.2580159902572632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,7168,0.12540799379348755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,7168,0.11497599631547928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,10240,0.17023999989032745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,8192,0.13635200262069702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,6144,0.10198400169610977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,6144,0.11212799698114395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,7168,0.12284799665212631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,5120,0.08665599673986435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,5120,0.09539200365543365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,6144,0.10707200318574905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,4096,0.07196799665689468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,4096,0.07872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,5120,0.09296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,3584,0.06976000219583511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,3584,0.06419199705123901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,4096,0.07420799881219864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,3072,0.06108799949288368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,3072,0.05711999908089638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,3584,0.06588800251483917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,2560,0.05452800169587135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,2560,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,3072,0.05833600088953972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,2048,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,2048,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,2560,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,1536,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,2048,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,1536,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,1024,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,1024,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,1536,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,1024,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,768,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,768,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,768,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,512,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,512,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,256,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,256,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,512,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,128,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,128,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,64,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,64,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,32,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,32,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,32,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,12288,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,12288,0.06092799827456474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,16384,0.0719359964132309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,16384,0.07875200361013412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,16384,0.06886400282382965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,10240,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,10240,0.0644799992442131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,12288,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,8192,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,10240,0.05110400170087814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,8192,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,7168,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,65536,0.2534720003604889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,8192,0.040832001715898514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,7168,0.047648001462221146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,6144,0.037408001720905304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,6144,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,7168,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,5120,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,5120,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,6144,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,4096,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,65536,0.2741760015487671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,4096,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,5120,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,3584,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,4096,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,65536,0.26185598969459534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,3584,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,3072,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,3584,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,2560,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,2560,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,3072,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,2048,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,2560,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,2048,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,1536,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,2048,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,1536,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,1536,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,1024,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,1024,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,768,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,1024,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,512,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,3072,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,12288,0.05423999950289726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,12288,0.05958399921655655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,16384,0.0753600001335144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,16384,0.07203199714422226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,16384,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,10240,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,12288,0.04729599878191948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,10240,0.049247998744249344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,10240,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,8192,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,8192,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,7168,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,65536,0.2568320035934448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,8192,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,7168,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,6144,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,6144,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,7168,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,5120,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,65536,0.26979199051856995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,5120,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,6144,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,5120,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,4096,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,4096,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,3584,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,3584,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,65536,0.2024639993906021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,3584,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,3072,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,3072,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,2560,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,3072,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,2560,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,2560,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,2048,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,2048,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,2048,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,1536,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,1536,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,1024,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,1024,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,768,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,512,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,512,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,12288,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,16384,0.07267200201749802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,16384,0.07459200173616409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,16384,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,12288,0.04447999969124794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,10240,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,10240,0.046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,12288,0.054207999259233475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,10240,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,8192,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,8192,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,65536,0.25440001487731934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,7168,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,8192,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,65536,0.2698880136013031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,7168,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,6144,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,6144,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,5120,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,5120,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,4096,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,4096,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,5120,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,3584,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,3584,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,3584,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,3072,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,3072,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,65536,0.1902720034122467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,2560,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,3072,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,2560,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,2560,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,2048,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,2048,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,1536,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,1536,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,2048,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,1024,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,1024,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,768,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,512,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,768,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,512,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,12288,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,12288,0.04339199885725975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,16384,0.0469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,16384,0.05110400170087814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,16384,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,10240,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,12288,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,10240,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,10240,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,8192,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,65536,0.1345600038766861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,7168,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,8192,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,7168,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,65536,0.15033599734306335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,6144,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,6144,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,5120,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,5120,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,6144,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,5120,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,4096,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,3584,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,65536,0.1462080031633377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,3584,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,3072,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,2560,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,2560,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,1536,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,1024,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,768,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,512,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,768,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,12288,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,12288,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,16384,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,16384,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,16384,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,10240,0.03654399886727333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,12288,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,10240,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,10240,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,8192,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,7168,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,7168,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,8192,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,65536,0.1228799968957901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,65536,0.13654400408267975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,6144,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,7168,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,6144,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,5120,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,5120,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,6144,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,5120,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,4096,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,3584,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,65536,0.13715200126171112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,3072,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,2048,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,128,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,12288,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,12288,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,16384,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,16384,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,16384,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,10240,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,12288,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,10240,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,10240,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,8192,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,8192,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,65536,0.10790400207042694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,65536,0.1188800036907196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,8192,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,7168,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,6144,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,7168,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,6144,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,6144,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,5120,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,4096,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,5120,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,65536,0.1289920061826706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,7168,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,3072,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,2048,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,1536,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,768,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,128,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,12288,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,12288,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,16384,0.03596799820661545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,16384,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,16384,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,10240,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,12288,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,10240,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,10240,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,8192,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,8192,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,8192,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,7168,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,65536,0.10335999727249146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,65536,0.09145600348711014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,7168,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,6144,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,7168,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,6144,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,5120,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,6144,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,5120,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,5120,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,4096,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,65536,0.12307199835777283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,3072,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,3072,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,2560,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,1536,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,1024,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,768,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,12288,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,16384,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,16384,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,12288,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,16384,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,10240,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,12288,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,10240,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,8192,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,10240,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,8192,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,65536,0.07568000257015228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,65536,0.08511999994516373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,8192,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,7168,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,7168,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,6144,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,6144,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,6144,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,5120,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,65536,0.11689600348472595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,3584,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,3072,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,2048,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,1536,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,1024,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,12288,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,12288,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,16384,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,16384,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,16384,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,12288,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,10240,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,10240,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,10240,0.042688000947237015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,8192,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,8192,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,65536,0.06748799979686737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,7168,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,65536,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,8192,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,7168,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,6144,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,5120,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,6144,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,4096,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,5120,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,4096,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,65536,0.11475200206041336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,3072,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,2560,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,1536,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,1536,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,1024,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,3584,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,12288,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,12288,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,16384,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,16384,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,16384,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,12288,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,10240,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,10240,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,10240,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,8192,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,8192,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,7168,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,8192,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,65536,0.06361600011587143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,65536,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,7168,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,7168,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,6144,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,6144,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,5120,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,6144,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,5120,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,4096,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,65536,0.11503999680280685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,3584,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,3584,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,3072,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,2560,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,1536,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,64,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,12288,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,12288,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,16384,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,16384,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,12288,0.04569600149989128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,10240,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,10240,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,10240,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,8192,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,8192,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,65536,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,8192,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,65536,0.05555199831724167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,7168,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,16384,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,7168,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,7168,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,6144,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,6144,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,5120,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,5120,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,5120,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,4096,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,65536,0.11430399864912033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,4096,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,3072,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,2560,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,1536,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,1024,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,1024,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,64,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,12288,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,16384,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,12288,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,16384,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,16384,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,12288,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,10240,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,10240,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,10240,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,8192,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,8192,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,7168,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,65536,0.04927999898791313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,65536,0.05711999908089638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,7168,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,7168,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,5120,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,6144,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,5120,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,65536,0.11276800185441971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,4096,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,4096,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,3584,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,3072,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,3072,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,2560,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,2048,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,1536,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,64,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,12288,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,12288,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,16384,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,16384,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,16384,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,12288,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,10240,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,10240,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,8192,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,8192,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,65536,0.042688000947237015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,65536,0.05081599950790405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,8192,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,7168,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,6144,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,7168,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,5120,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,6144,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,5120,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,65536,0.11273600161075592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,4096,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,3584,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,2560,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,2048,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,1024,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,12288,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,12288,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,16384,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,16384,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,16384,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,10240,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,12288,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,10240,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,8192,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,8192,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,65536,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,7168,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,65536,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,8192,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,7168,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,7168,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,5120,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,6144,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,6144,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,5120,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,65536,0.11321599781513214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,10240,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,4096,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,3072,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,2048,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,2560,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,2048,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,1536,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,12288,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,12288,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,16384,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,16384,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,16384,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,12288,0.04473600164055824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,10240,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,10240,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,10240,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,8192,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,8192,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,7168,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,65536,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,8192,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,65536,0.04662400111556053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,7168,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,6144,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,7168,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,6144,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,4096,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,5120,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,65536,0.11193600296974182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,4096,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,3584,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,2560,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,3072,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,2048,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,1536,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,768,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,12288,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,16384,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,16384,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,16384,0.05615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,12288,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,12288,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,10240,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,10240,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,8192,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,10240,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,65536,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,65536,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,8192,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,7168,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,8192,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,7168,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,6144,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,6144,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,7168,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,6144,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,65536,0.11321599781513214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,4096,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,3584,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,3584,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,3072,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,2560,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,2048,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,2048,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,1536,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,768,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,12288,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,12288,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,16384,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,16384,0.05648000165820122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,16384,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,12288,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,10240,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,10240,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,8192,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,8192,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,10240,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,7168,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,8192,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,65536,0.07494399696588516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,65536,0.07379200309515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,7168,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,7168,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,6144,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,6144,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,5120,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,5120,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,6144,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,65536,0.14368000626564026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,4096,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,4096,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,3584,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,3584,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,3072,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,3584,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,2560,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,2048,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,1536,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,1024,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,12288,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,12288,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,16384,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,16384,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,16384,0.056543998420238495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,12288,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,10240,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,10240,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,10240,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,8192,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,8192,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,65536,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,7168,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,65536,0.07222399860620499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,8192,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,7168,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,5120,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,6144,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,7168,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,6144,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,5120,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,65536,0.14351999759674072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,5120,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,4096,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,3584,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,3072,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,1536,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,768,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,256,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,128,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,12288,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,12288,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,16384,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,16384,0.042047999799251556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,12288,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,10240,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,10240,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,8192,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,8192,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,65536,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,16384,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,10240,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,65536,0.07513599842786789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,8192,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,7168,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,7168,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,6144,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,6144,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,7168,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,65536,0.14323200285434723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,6144,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,5120,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,4096,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,4096,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,5120,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,3584,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,3584,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,2560,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,3072,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,2560,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,2048,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,1536,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,1024,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,768,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,256,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,128,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,12288,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,12288,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,16384,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,16384,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,16384,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,12288,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,10240,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,10240,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,10240,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,8192,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,8192,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,65536,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,7168,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,65536,0.0708480030298233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,8192,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,7168,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,7168,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,6144,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,5120,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,6144,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,65536,0.19974400103092194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,5120,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,3584,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,3584,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,4096,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,3584,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,3072,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,2560,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,2048,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,1536,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,768,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,12288,0.1924159973859787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,10240,0.16150400042533875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,16384,0.25331199169158936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,12288,0.21456000208854675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,10240,0.17849600315093994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,12288,0.1971839964389801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,16384,0.27692800760269165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,8192,0.13257600367069244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,16384,0.2576639950275421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,8192,0.14774399995803833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,7168,0.12809599936008453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,7168,0.11628799885511398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,10240,0.16787199676036835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,8192,0.13657599687576294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,6144,0.10198400169610977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,6144,0.11302399635314941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,7168,0.12105599790811539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,5120,0.08659200370311737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,5120,0.09715200215578079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,4096,0.07996799796819687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,6144,0.1064319983124733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,4096,0.07187200337648392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,3584,0.06387200206518173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,5120,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,3584,0.07075200229883194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,4096,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,3072,0.05686400085687637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,3072,0.06278400123119354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,3072,0.05772799998521805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,3584,0.0647680014371872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,2560,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,2560,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,2048,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,2048,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,2560,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,1536,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,2048,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,1536,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,1024,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,1536,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,1024,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,768,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,1024,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,768,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,512,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,512,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,768,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,512,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,256,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,256,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,128,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,256,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,128,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,64,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,64,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,32,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,32,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,12288,0.06163199990987778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,16384,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,16384,0.0783040001988411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,16384,0.07308799773454666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,10240,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,12288,0.055615998804569244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,10240,0.05615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,12288,0.05478399991989136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,10240,0.048128001391887665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,8192,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,8192,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,65536,0.25331199169158936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,8192,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,7168,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,7168,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,6144,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,6144,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,7168,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,6144,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,5120,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,65536,0.2752639949321747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,5120,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,4096,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,4096,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,5120,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,4096,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,3584,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,3584,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,3072,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,3584,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,65536,0.2550080120563507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,3072,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,3072,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,2560,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,2560,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,2048,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,2560,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,2048,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,1536,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,2048,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,1536,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,1024,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,1024,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,1536,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,768,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,1024,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,512,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,256,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,12288,0.05443200096487999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,12288,0.05830400064587593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,16384,0.06918399780988693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,16384,0.07443200051784515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,16384,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,12288,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,10240,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,10240,0.04668800160288811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,10240,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,8192,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,8192,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,8192,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,7168,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,65536,0.25299200415611267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,7168,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,6144,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,6144,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,7168,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,65536,0.2682879865169525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,5120,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,6144,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,5120,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,5120,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,4096,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,4096,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,4096,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,3584,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,3584,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,3584,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,65536,0.19939200580120087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,3072,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,3072,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,3072,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,2560,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,2560,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,2048,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,2048,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,2560,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,1536,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,1536,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,2048,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,1536,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,1024,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,1024,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,768,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,768,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,512,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,64,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,32,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,12288,0.05673599988222122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,16384,0.06934399902820587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,16384,0.0727040022611618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,12288,0.05411199852824211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,16384,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,12288,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,10240,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,10240,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,8192,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,10240,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,8192,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,65536,0.2524160146713257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,7168,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,7168,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,7168,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,6144,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,6144,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,65536,0.2683520019054413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,5120,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,6144,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,5120,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,4096,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,4096,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,5120,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,3584,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,4096,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,65536,0.18751999735832214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,3584,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,3584,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,3072,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,2560,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,3072,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,3072,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,2560,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,2048,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,2560,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,2048,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,1536,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,1536,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,1024,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,1024,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,768,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,512,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,256,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,12288,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,12288,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,16384,0.04560000076889992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,16384,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,16384,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,12288,0.03641600161790848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,10240,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,10240,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,10240,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,8192,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,8192,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,7168,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,8192,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,7168,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,65536,0.13446399569511414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,65536,0.15007999539375305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,6144,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,6144,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,5120,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,5120,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,5120,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,4096,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,65536,0.14643199741840363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,3584,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,3072,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,2560,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,6144,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,768,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,12288,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,12288,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,16384,0.04729599878191948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,16384,0.04179200157523155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,16384,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,10240,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,12288,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,10240,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,10240,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,8192,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,8192,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,65536,0.11798399686813354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,7168,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,8192,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,7168,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,65536,0.13337600231170654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,7168,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,6144,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,6144,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,6144,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,5120,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,5120,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,65536,0.1353279948234558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,5120,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,4096,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,3584,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,2560,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,2048,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,12288,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,16384,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,16384,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,16384,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,12288,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,12288,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,10240,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,10240,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,8192,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,10240,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,8192,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,65536,0.10524799674749374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,8192,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,7168,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,7168,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,65536,0.11686400324106216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,6144,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,7168,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,6144,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,5120,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,65536,0.12883199751377106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,4096,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,3584,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,3584,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,3072,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,1536,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,1024,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,12288,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,12288,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,16384,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,16384,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,16384,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,12288,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,10240,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,10240,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,10240,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,8192,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,8192,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,7168,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,8192,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,65536,0.0931520015001297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,65536,0.10214400291442871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,7168,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,7168,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,6144,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,6144,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,5120,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,6144,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,4096,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,4096,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,5120,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,4096,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,3584,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,3584,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,65536,0.12156800180673599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,3072,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,3072,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,2048,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,12288,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,12288,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,16384,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,16384,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,16384,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,12288,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,10240,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,10240,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,10240,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,8192,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,8192,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,7168,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,8192,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,65536,0.07756800204515457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,65536,0.08390399813652039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,7168,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,7168,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,6144,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,6144,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,5120,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,5120,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,6144,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,5120,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,4096,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,4096,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,4096,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,3584,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,65536,0.11638399958610535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,3584,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,3072,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,3072,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,2560,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,2560,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,2048,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,1536,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,12288,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,16384,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,16384,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,16384,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,12288,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,10240,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,10240,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,8192,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,10240,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,65536,0.06777600198984146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,12288,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,8192,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,65536,0.07766400277614594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,8192,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,7168,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,7168,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,6144,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,7168,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,6144,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,6144,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,5120,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,5120,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,5120,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,65536,0.11433599889278412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,4096,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,4096,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,3584,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,3584,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,4096,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,3072,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,3072,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,3072,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,2560,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,2560,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,2560,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,2048,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,2048,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,1536,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,1024,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,64,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,12288,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,12288,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,16384,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,16384,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,16384,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,12288,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,10240,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,10240,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,10240,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,8192,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,8192,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,65536,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,7168,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,8192,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,65536,0.06255999952554703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,7168,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,6144,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,7168,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,6144,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,5120,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,5120,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,6144,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,4096,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,65536,0.11407999694347382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,4096,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,3584,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,3584,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,4096,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,3072,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,3584,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,3072,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,2560,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,2048,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,2560,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,2048,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,1536,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,1536,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,1024,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,12288,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,16384,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,16384,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,12288,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,16384,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,12288,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,10240,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,10240,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,10240,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,8192,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,8192,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,65536,0.06259199976921082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,65536,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,7168,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,7168,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,8192,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,7168,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,6144,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,6144,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,5120,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,6144,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,5120,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,5120,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,65536,0.11257600039243698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,4096,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,3584,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,4096,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,4096,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,3584,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,3584,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,3072,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,3072,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,3072,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,2560,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,2560,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,2560,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,2048,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,2048,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,2048,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,1536,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,768,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,12288,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,12288,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,16384,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,16384,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,16384,0.05852799862623215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,12288,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,10240,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,10240,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,10240,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,8192,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,8192,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,65536,0.04899200052022934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,65536,0.05734400078654289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,8192,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,7168,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,6144,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,7168,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,6144,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,5120,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,5120,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,4096,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,4096,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,65536,0.11184000223875046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,4096,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,3584,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,3584,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,3072,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,7168,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,3072,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,3072,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,2560,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,2560,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,2048,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,2560,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,2048,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,2048,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,1536,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,1024,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,1024,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,1536,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,768,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,768,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,768,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,512,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,64,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,32,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,12288,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,12288,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,16384,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,16384,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,16384,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,12288,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,10240,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,10240,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,8192,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,10240,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,8192,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,65536,0.050655998289585114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,65536,0.04358400031924248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,7168,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,8192,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,7168,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,7168,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,6144,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,6144,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,5120,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,6144,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,5120,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,4096,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,4096,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,5120,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,4096,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,3584,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,65536,0.11247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,3584,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,3584,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,3072,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,3072,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,3072,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,2560,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,2560,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,2048,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,2048,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,1536,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,2048,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,1024,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,1536,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,1536,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,1024,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,1024,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,768,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,512,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,128,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,12288,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,16384,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,16384,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,16384,0.0424639992415905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,12288,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,64,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,10240,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,12288,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,10240,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,8192,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,65536,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,10240,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,65536,0.04652800038456917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,8192,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,8192,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,7168,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,7168,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,6144,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,6144,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,7168,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,5120,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,65536,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,5120,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,6144,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,4096,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,4096,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,3584,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,4096,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,3584,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,3584,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,3072,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,3072,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,3072,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,2560,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,2560,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,2560,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,2048,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,2048,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,2048,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,1536,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,1536,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,1536,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,1024,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,768,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,512,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,64,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,32,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,12288,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,12288,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,16384,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,16384,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,16384,0.057151999324560165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,12288,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,10240,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,10240,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,10240,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,8192,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,8192,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,8192,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,65536,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,7168,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,65536,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,7168,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,6144,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,7168,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,5120,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,6144,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,6144,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,5120,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,4096,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,5120,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,4096,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,65536,0.1120000034570694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,3584,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,3584,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,4096,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,3072,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,3584,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,2560,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,3072,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,2560,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,2560,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,2048,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,2048,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,1536,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,1536,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,1024,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,1024,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,768,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,768,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,512,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,3072,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,32,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,12288,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,12288,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,16384,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,16384,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,16384,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,12288,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,10240,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,10240,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,10240,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,8192,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,8192,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,65536,0.051711998879909515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,7168,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,65536,0.04275200143456459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,8192,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,7168,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,6144,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,7168,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,5120,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,5120,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,6144,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,6144,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,65536,0.11203200370073318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,4096,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,4096,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,3584,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,3584,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,3072,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,3584,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,4096,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,3072,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,2560,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,3072,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,2560,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,2048,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,2048,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,2560,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,2048,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,1536,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,1536,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,1024,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,1024,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,1536,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,1024,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,768,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,512,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,256,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,64,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,12288,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,12288,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,16384,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,16384,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,12288,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,10240,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,10240,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,10240,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,8192,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,8192,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,65536,0.051231998950242996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,65536,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,8192,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,16384,0.05753599852323532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,7168,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,7168,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,6144,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,6144,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,5120,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,7168,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,6144,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,5120,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,65536,0.1430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,5120,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,4096,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,3584,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,4096,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,3584,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,4096,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,3072,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,3584,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,3072,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,2560,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,3072,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,2560,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,2560,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,2048,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,2048,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,1536,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,1536,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,1024,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,1536,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,768,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,768,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,512,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,256,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,64,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,32,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,32,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,12288,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,12288,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,16384,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,16384,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,16384,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,12288,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,10240,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,10240,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,10240,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,8192,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,8192,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,65536,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,7168,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,8192,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,65536,0.050655998289585114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,7168,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,7168,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,6144,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,6144,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,5120,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,6144,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,5120,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,65536,0.14287999272346497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,4096,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,4096,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,4096,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,3584,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,3072,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,3584,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,3584,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,3072,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,2560,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,3072,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,2560,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,2048,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,2560,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,2048,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,1536,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,1536,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,1536,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,1024,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,768,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,768,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,512,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,512,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,256,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,256,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,1024,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,12288,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,12288,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,16384,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,16384,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,16384,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,12288,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,10240,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,10240,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,8192,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,10240,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,65536,0.051392000168561935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,8192,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,7168,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,8192,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,65536,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,7168,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,6144,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,6144,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,7168,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,6144,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,5120,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,5120,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,4096,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,65536,0.14268800616264343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,4096,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,5120,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,4096,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,3584,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,3584,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,3584,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,3072,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,3072,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,2560,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,3072,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,2560,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,2560,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,2048,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,2048,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,1536,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,2048,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,1536,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,1536,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,1024,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,1024,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,768,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,768,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,768,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,512,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,256,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,32,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,32,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,12288,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,12288,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,16384,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,16384,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,16384,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,10240,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,12288,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,10240,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,8192,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,65536,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,10240,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,7168,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,8192,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,65536,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,7168,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,6144,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,6144,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,7168,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,6144,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,5120,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,65536,0.11238399893045425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,4096,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,5120,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,4096,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,5120,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,8192,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,3584,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,3584,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,4096,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,3072,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,3072,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,3584,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,2560,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,3072,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,2560,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,2560,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,2048,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,2048,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,1536,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,1536,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,2048,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,1024,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,1024,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,1536,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,768,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,768,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,256,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,512,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,512,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,128,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,256,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,128,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,12288,0.190528005361557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,10240,0.16140800714492798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,16384,0.2537600100040436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,10240,0.1728000044822693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,12288,0.20342400670051575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,12288,0.1894720047712326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,16384,0.27136000990867615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,8192,0.1327359974384308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,8192,0.1416960060596466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,16384,0.2515200078487396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,7168,0.1316480040550232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,7168,0.11615999788045883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,10240,0.1590079963207245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,8192,0.1316159963607788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,6144,0.1013759970664978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,7168,0.11971200257539749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,6144,0.11081600189208984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,5120,0.08710400015115738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,5120,0.09718400239944458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,4096,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,4096,0.07145600020885468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,6144,0.10182400047779083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,5120,0.09196799993515015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,3584,0.0644799992442131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,4096,0.07452800124883652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,3584,0.07123199850320816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,3072,0.05663999915122986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,3072,0.06297600269317627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,3584,0.06943999975919724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,2560,0.05539200082421303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,2560,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,3072,0.06496000289916992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,2048,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,2048,0.0416640006005764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,2560,0.060256000608205795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,1536,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,1536,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,2048,0.04956800118088722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,1536,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,1024,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,1024,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,768,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,1024,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,768,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,512,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,768,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,512,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,256,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,256,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,512,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,128,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,256,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,128,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,64,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,64,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,32,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,64,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,32,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,32,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,12288,0.06028800085186958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,16384,0.07023999840021133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,128,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,16384,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,16384,0.06876800209283829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,12288,0.05475199967622757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,12288,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,10240,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,10240,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,10240,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,8192,0.04831999912858009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,8192,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,7168,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,8192,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,65536,0.25251200795173645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,7168,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,7168,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,6144,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,6144,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,5120,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,65536,0.273824006319046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,5120,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,6144,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,4096,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,4096,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,5120,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,3584,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,3584,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,4096,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,3072,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,65536,0.2905920147895813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,3584,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,2560,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,3072,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,3072,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,2560,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,2048,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,2048,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,2560,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,2048,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,1536,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,1536,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,1536,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,1024,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,1024,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,768,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,1024,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,768,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,768,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,512,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,256,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,256,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,128,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,64,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,32,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,32,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,32,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,12288,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,12288,0.04694399982690811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,16384,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,16384,0.06111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,16384,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,12288,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,10240,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,10240,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,8192,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,10240,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,8192,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,7168,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,8192,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,7168,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,65536,0.19123199582099915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,7168,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,6144,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,6144,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,5120,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,65536,0.2160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,6144,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,5120,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,5120,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,4096,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,4096,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,3584,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,3584,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,65536,0.23494400084018707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,3072,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,3072,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,2560,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,2560,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,2560,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,2048,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,2048,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,1536,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,1536,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,1024,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,1024,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,768,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,1024,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,768,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,512,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,768,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,512,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,256,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,128,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,32,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,12288,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,12288,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,16384,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,16384,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,16384,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,12288,0.037696000188589096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,10240,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,10240,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,10240,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,8192,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,8192,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,7168,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,8192,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,65536,0.1621759980916977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,7168,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,6144,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,7168,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,65536,0.1987520009279251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,5120,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,6144,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,5120,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,5120,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,4096,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,3584,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,4096,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,65536,0.19616000354290009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,3072,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,2560,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,2560,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,2048,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,1536,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,1024,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,1024,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,768,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,768,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,512,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,512,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,128,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,256,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,32,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,12288,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,12288,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,16384,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,16384,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,10240,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,12288,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,10240,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,10240,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,8192,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,8192,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,8192,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,7168,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,65536,0.1318719983100891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,16384,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,65536,0.15004800260066986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,7168,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,6144,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,6144,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,6144,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,5120,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,4096,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,4096,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,65536,0.15625600516796112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,3584,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,4096,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,3584,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,3072,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,3584,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,3072,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,3072,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,2560,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,2048,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,1536,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,1536,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,768,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,256,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,64,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,64,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,32,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,32,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,12288,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,12288,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,16384,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,16384,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,16384,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,12288,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,10240,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,10240,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,10240,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,8192,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,8192,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,7168,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,65536,0.11795199662446976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,8192,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,7168,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,65536,0.13808000087738037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,6144,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,7168,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,6144,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,6144,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,4096,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,4096,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,5120,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,3584,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,65536,0.1520960032939911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,4096,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,3584,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,3584,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,3072,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,3072,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,2560,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,3072,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,2560,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,2560,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,2048,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,1536,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,1536,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,64,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,2048,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,64,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,32,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,32,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,12288,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,12288,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,16384,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,16384,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,16384,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,10240,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,12288,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,10240,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,8192,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,8192,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,7168,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,8192,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,7168,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,65536,0.104032002389431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,65536,0.11856000125408173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,6144,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,6144,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,7168,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,6144,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,5120,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,5120,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,4096,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,4096,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,4096,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,3584,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,65536,0.1138560026884079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,3584,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,3072,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,3072,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,3584,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,2560,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,3072,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,2560,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,2048,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,2048,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,2560,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,2048,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,1536,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,1536,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,768,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,64,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,32,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,64,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,64,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,32,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,32,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,12288,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,12288,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,16384,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,16384,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,16384,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,12288,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,10240,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,10240,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,8192,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,8192,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,65536,0.08848000317811966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,7168,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,8192,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,7168,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,65536,0.10038399696350098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,7168,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,6144,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,6144,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,10240,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,5120,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,6144,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,5120,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,5120,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,4096,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,4096,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,65536,0.1098560020327568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,3584,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,4096,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,3584,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,3584,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,3072,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,3072,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,2560,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,2560,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,2560,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,3072,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,2048,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,2048,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,2048,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,1536,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,1536,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,1024,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,768,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,128,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,64,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,64,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,64,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,32,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,32,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,32,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,12288,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,12288,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,16384,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,16384,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,16384,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,10240,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,12288,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,10240,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,8192,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,10240,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,8192,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,8192,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,7168,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,65536,0.07529599964618683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,65536,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,7168,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,6144,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,6144,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,7168,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,5120,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,5120,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,6144,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,5120,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,4096,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,4096,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,4096,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,65536,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,3584,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,3584,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,3584,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,3072,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,3072,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,3072,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,2560,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,2560,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,2048,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,2048,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,2560,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,1536,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,2048,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,1536,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,1024,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,768,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,512,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,256,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,512,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,64,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,32,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,32,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,12288,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,16384,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,16384,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,16384,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,12288,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,10240,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,10240,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,12288,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,8192,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,10240,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,8192,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,7168,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,65536,0.06889600306749344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,8192,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,65536,0.07516799867153168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,7168,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,6144,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,6144,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,7168,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,5120,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,6144,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,5120,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,5120,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,4096,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,4096,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,65536,0.07344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,3584,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,3584,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,4096,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,3072,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,3584,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,3072,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,3072,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,2560,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,2560,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,2048,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,2048,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,2560,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,2048,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,1536,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,1536,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,1024,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,1536,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,1024,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,768,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,768,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,512,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,512,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,64,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,32,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,32,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,12288,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,12288,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,16384,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,16384,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,16384,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,12288,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,10240,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,10240,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,8192,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,10240,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,8192,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,7168,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,65536,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,8192,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,65536,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,7168,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,7168,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,6144,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,6144,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,5120,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,5120,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,4096,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,6144,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,5120,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,4096,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,3584,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,65536,0.07027199864387512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,3584,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,3584,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,3072,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,3072,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,3072,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,2560,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,2560,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,2560,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,2048,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,2048,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,2048,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,1536,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,1536,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,1536,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,1024,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,4096,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,768,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,768,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,512,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,768,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,512,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,256,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,512,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,12288,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,12288,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,16384,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,16384,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,16384,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,12288,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,10240,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,10240,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,10240,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,8192,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,8192,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,65536,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,65536,0.06159999966621399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,7168,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,7168,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,8192,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,6144,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,7168,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,6144,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,5120,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,5120,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,4096,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,6144,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,4096,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,65536,0.06854400038719177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,5120,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,3584,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,3584,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,4096,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,3584,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,3072,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,3072,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,3072,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,2560,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,2560,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,2560,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,2048,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,2048,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,2048,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,1536,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,1536,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,1024,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,1024,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,768,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,1024,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,768,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,1536,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,768,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,512,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,256,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,512,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,512,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,128,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,12288,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,16384,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,16384,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,12288,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,12288,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,10240,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,10240,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,8192,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,10240,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,65536,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,8192,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,65536,0.05593600124120712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,8192,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,16384,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,7168,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,6144,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,7168,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,6144,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,7168,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,6144,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,65536,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,5120,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,5120,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,4096,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,5120,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,4096,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,4096,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,3584,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,3584,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,3072,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,3584,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,3072,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,2560,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,3072,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,2560,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,2048,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,2048,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,1536,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,2048,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,1536,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,1536,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,1024,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,1024,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,1024,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,768,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,768,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,768,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,512,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,256,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,256,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,128,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,128,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,64,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,32,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,12288,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,12288,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,16384,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,16384,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,16384,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,12288,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,10240,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,10240,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,10240,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,8192,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,8192,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,65536,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,65536,0.0496320016682148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,7168,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,8192,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,7168,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,6144,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,7168,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,6144,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,6144,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,5120,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,5120,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,5120,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,4096,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,4096,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,4096,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,65536,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,3584,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,3584,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,3584,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,3072,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,3072,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,2560,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,3072,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,2560,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,2560,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,2048,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,2048,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,1536,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,1536,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,1536,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,1024,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,768,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,512,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,768,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,512,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,512,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,256,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,256,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,2048,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,256,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,128,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,64,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,32,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,32,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,12288,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,12288,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,16384,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,16384,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,16384,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,12288,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,10240,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,10240,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,10240,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,8192,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,8192,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,65536,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,7168,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,65536,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,8192,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,7168,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,7168,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,6144,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,6144,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,5120,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,6144,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,5120,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,4096,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,5120,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,4096,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,65536,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,4096,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,3584,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,3584,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,3584,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,3072,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,3072,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,2560,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,2560,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,3072,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,2048,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,2560,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,2048,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,2048,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,1536,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,1536,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,1024,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,1536,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,1024,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,768,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,768,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,768,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,512,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,512,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,256,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,256,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,128,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,64,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,32,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,12288,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,16384,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,16384,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,16384,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,12288,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,10240,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,10240,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,8192,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,10240,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,65536,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,8192,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,12288,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,65536,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,8192,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,7168,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,6144,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,7168,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,6144,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,7168,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,5120,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,6144,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,65536,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,4096,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,4096,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,5120,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,4096,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,5120,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,3584,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,3584,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,3584,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,3072,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,3072,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,2560,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,3072,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,2560,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,2048,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,2560,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,2048,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,1536,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,1536,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,1024,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,1024,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,1024,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,1536,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,768,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,512,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,768,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,512,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,768,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,512,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,256,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,128,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,128,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,64,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,64,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,32,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,12288,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,12288,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,16384,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,16384,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,16384,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,12288,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,10240,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,10240,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,8192,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,10240,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,8192,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,65536,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,8192,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,7168,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,65536,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,7168,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,7168,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,6144,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,6144,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,5120,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,5120,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,6144,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,5120,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,4096,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,4096,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,4096,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,65536,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,3584,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,3584,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,3072,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,3072,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,3584,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,3072,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,2560,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,2560,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,2048,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,2560,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,2048,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,1536,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,2048,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,1536,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,1536,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,1024,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,1024,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,1024,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,768,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,768,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,768,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,512,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,256,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,512,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,256,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,128,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,256,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,128,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,64,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,64,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,32,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,32,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,12288,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,512,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,16384,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,16384,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,12288,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,16384,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,12288,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,10240,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,10240,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,8192,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,65536,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,10240,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,8192,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,65536,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,7168,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,8192,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,7168,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,6144,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,7168,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,5120,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,6144,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,6144,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,5120,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,65536,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,4096,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,4096,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,5120,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,4096,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,3584,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,3584,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,3584,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,3072,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,2560,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,3072,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,3072,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,2048,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,2560,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,2560,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,1536,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,2048,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,1536,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,1536,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,1024,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,1024,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,1024,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,768,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,768,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,512,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,768,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,256,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,512,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,256,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,128,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,128,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,64,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,32,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,64,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,32,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,12288,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,16384,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,12288,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,16384,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,16384,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,12288,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,10240,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,10240,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,8192,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,10240,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,8192,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,65536,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,8192,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,7168,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,65536,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,7168,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,6144,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,6144,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,7168,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,5120,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,6144,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,65536,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,5120,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,4096,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,4096,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,3584,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,3584,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,4096,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,3072,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,3584,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,3072,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,2560,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,2560,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,3072,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,2560,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,2048,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,2048,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,1536,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,2048,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,1536,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,5120,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,1536,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,1024,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,1024,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,1024,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,768,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,768,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,512,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,512,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,768,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,512,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,256,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,128,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,256,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,128,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,64,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,64,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,32,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,12288,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,12288,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,16384,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,16384,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,16384,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,12288,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,10240,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,10240,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,10240,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,8192,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,8192,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,65536,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,65536,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,8192,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,7168,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,7168,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,6144,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,7168,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,6144,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,5120,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,6144,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,5120,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,4096,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,65536,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,5120,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,4096,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,4096,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,3584,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,3584,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,3584,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,3072,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,3072,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,3072,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,2560,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,2560,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,2560,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,2048,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,1536,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,2048,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,2048,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,1536,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,1024,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,1536,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,1024,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,768,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,1024,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,768,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,768,0.005632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,512,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,512,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,512,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,256,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,256,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,256,0.005663999821990728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,128,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,64,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,64,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,32,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,12288,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,16384,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,16384,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,64,0.0054720002226531506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,12288,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,16384,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,12288,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,10240,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,8192,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,10240,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,65536,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,10240,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,65536,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,8192,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,8192,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,7168,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,7168,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,6144,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,7168,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,6144,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,5120,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,6144,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,65536,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,5120,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,5120,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,4096,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,4096,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,4096,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,3584,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,3584,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,3072,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,3584,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,3072,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,3072,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,2560,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,2560,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,2048,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,1536,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,1536,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,2048,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,2048,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,1536,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,1024,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,1024,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,1024,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,768,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,768,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,512,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,768,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,512,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,512,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,256,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,256,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,256,0.005727999843657017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,128,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,128,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,32,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,64,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,32,0.005408000200986862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,12288,0.12649600207805634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,10240,0.11078400164842606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,16384,0.16387200355529785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,12288,0.18783999979496002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,10240,0.16022400557994843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,12288,0.1860159933567047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,16384,0.2465600073337555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,8192,0.08761599659919739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,8192,0.13046400249004364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,16384,0.24659200012683868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,7168,0.1151999980211258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,7168,0.08284799754619598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,10240,0.15702399611473083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,8192,0.12838399410247803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,6144,0.07689599692821503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,6144,0.09759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,5120,0.07036799937486649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,7168,0.1159679964184761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,5120,0.08633600175380707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,4096,0.06867200136184692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,4096,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,6144,0.09932799637317657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,5120,0.08934400230646133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,3584,0.051263999193906784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,3584,0.0628800019621849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,4096,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,3584,0.06534399837255478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,3072,0.047807998955249786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,3072,0.056384000927209854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,2560,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,3072,0.06047999858856201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,2048,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,2048,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,2560,0.05846399813890457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,1536,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,1536,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,2048,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,1024,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,1024,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,1536,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,1024,0.037696000188589096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,768,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,512,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,768,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,2560,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,768,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,512,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,512,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,256,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,256,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,128,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,256,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,128,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,128,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,64,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,64,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,64,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,32,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,32,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,12288,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,12288,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,16384,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,16384,0.06992000341415405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,16384,0.06806399673223495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,12288,0.05190400034189224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,10240,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,10240,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,10240,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,8192,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,8192,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,7168,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,8192,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,7168,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,65536,0.15884800255298615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,6144,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,6144,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,7168,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,6144,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,5120,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,5120,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,4096,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,5120,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,65536,0.25465598702430725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,4096,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,4096,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,3584,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,3072,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,3584,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,65536,0.25231999158859253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,3072,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,2560,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,2560,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,2048,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,1536,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,2048,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,1536,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,1024,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,1536,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,1024,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,768,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,768,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,768,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,512,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,256,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,64,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,128,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,32,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,12288,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,16384,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,16384,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,16384,0.05315199866890907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,12288,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,10240,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,10240,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,10240,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,12288,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,8192,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,8192,0.03209599852561951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,8192,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,65536,0.12198399752378464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,7168,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,7168,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,6144,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,6144,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,65536,0.19014400243759155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,6144,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,5120,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,4096,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,5120,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,3584,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,65536,0.1932159960269928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,3072,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,3072,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,2560,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,2048,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,2048,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,1024,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,768,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,768,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,512,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,128,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,64,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,64,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,32,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,32,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,32,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,12288,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,12288,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,16384,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,16384,0.046592000871896744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,16384,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,12288,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,10240,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,10240,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,10240,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,8192,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,8192,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,7168,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,7168,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,8192,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,65536,0.10556799918413162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,6144,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,7168,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,6144,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,5120,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,65536,0.17494399845600128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,5120,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,6144,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,5120,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,4096,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,4096,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,3584,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,3584,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,3584,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,4096,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,3072,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,3072,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,65536,0.1629440039396286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,2560,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,3072,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,2560,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,2048,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,2560,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,2048,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,2048,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,1536,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,256,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,64,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,32,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,64,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,32,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,32,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,16384,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,12288,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,16384,0.042847998440265656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,12288,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,16384,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,10240,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,12288,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,10240,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,8192,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,10240,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,65536,0.08511999994516373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,8192,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,7168,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,8192,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,65536,0.15251199901103973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,7168,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,6144,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,6144,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,5120,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,6144,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,4096,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,5120,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,4096,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,5120,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,65536,0.13017599284648895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,4096,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,3584,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,3584,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,3072,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,2560,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,3072,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,3584,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,3072,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,2560,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,2560,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,2048,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,2048,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,1536,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,1024,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,768,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,64,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,64,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,32,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,32,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,32,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,12288,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,12288,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,16384,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,16384,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,16384,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,12288,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,10240,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,10240,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,10240,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,8192,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,8192,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,8192,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,65536,0.07657600194215775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,7168,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,7168,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,65536,0.13174399733543396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,6144,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,6144,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,5120,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,5120,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,6144,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,4096,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,5120,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,65536,0.12326399981975555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,4096,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,3584,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,3584,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,4096,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,3584,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,3072,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,3072,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,3072,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,2560,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,2560,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,2048,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,2560,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,2048,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,7168,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,2048,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,1536,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,1024,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,1024,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,1536,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,768,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,768,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,512,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,64,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,32,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,64,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,32,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,12288,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,12288,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,16384,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,16384,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,16384,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,12288,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,10240,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,10240,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,8192,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,10240,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,8192,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,7168,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,8192,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,7168,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,65536,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,7168,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,65536,0.0995199978351593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,6144,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,6144,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,6144,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,5120,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,5120,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,4096,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,5120,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,4096,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,3584,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,4096,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,3584,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,65536,0.0990080013871193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,3584,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,3072,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,3072,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,2560,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,2560,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,3072,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,2560,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,2048,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,2048,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,2048,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,1536,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,1536,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,1024,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,1536,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,1024,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,768,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,1024,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,768,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,768,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,256,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,512,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,64,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,32,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,12288,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,16384,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,16384,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,64,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,16384,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,12288,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,12288,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,10240,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,10240,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,8192,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,10240,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,65536,0.06745599955320358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,8192,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,7168,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,8192,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,7168,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,65536,0.08627200126647949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,7168,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,6144,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,6144,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,5120,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,5120,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,6144,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,5120,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,4096,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,4096,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,65536,0.08924800157546997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,4096,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,3584,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,3584,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,3072,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,3584,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,3072,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,3072,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,2560,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,2048,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,2560,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,2560,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,1536,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,2048,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,2048,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,1536,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,1024,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,1536,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,1024,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,768,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,768,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,512,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,768,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,512,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,256,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,512,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,64,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,32,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,12288,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,12288,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,16384,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,16384,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,16384,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,12288,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,10240,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,10240,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,10240,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,8192,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,8192,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,8192,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,65536,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,7168,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,7168,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,65536,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,6144,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,7168,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,6144,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,6144,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,5120,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,5120,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,5120,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,4096,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,4096,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,65536,0.06889600306749344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,3584,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,4096,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,3072,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,3584,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,3584,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,3072,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,2560,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,2560,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,2560,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,2048,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,2048,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,1536,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,2048,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,1536,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,1536,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,1024,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,1024,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,768,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,768,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,1024,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,768,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,512,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,512,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,256,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,256,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,3072,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,128,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,12288,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,12288,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,16384,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,16384,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,16384,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,12288,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,10240,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,10240,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,10240,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,8192,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,8192,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,65536,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,8192,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,7168,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,7168,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,65536,0.06534399837255478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,6144,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,7168,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,6144,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,5120,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,6144,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,5120,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,4096,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,5120,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,65536,0.06268800050020218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,3584,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,4096,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,4096,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,3584,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,3584,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,3072,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,3072,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,3072,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,2560,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,2560,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,2560,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,2048,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,2048,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,1536,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,2048,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,1536,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,1536,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,1024,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,1024,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,768,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,768,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,512,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,512,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,512,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,256,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,12288,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,16384,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,16384,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,16384,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,12288,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,10240,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,10240,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,12288,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,8192,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,10240,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,65536,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,8192,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,65536,0.05990400165319443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,7168,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,8192,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,7168,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,6144,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,6144,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,7168,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,6144,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,5120,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,5120,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,4096,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,4096,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,5120,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,65536,0.05827200040221214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,3584,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,4096,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,3584,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,3584,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,3072,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,3072,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,3072,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,2560,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,2560,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,2560,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,2048,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,2048,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,2048,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,1536,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,1536,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,1024,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,1024,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,1024,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,1536,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,768,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,768,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,512,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,768,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,256,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,512,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,512,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,256,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,128,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,256,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,12288,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,12288,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,16384,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,16384,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,16384,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,12288,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,10240,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,10240,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,10240,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,8192,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,8192,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,7168,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,8192,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,65536,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,7168,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,65536,0.05526399984955788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,6144,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,7168,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,6144,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,6144,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,5120,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,5120,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,5120,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,4096,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,4096,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,65536,0.05427199974656105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,4096,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,3584,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,3584,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,3072,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,3584,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,3072,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,3072,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,2560,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,2560,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,2560,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,2048,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,2048,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,1536,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,1536,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,2048,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,1024,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,1536,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,768,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,768,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,1024,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,512,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,768,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,512,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,256,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,256,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,256,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,128,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,128,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,512,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,32,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,1024,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,16384,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,16384,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,12288,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,12288,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,16384,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,12288,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,10240,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,10240,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,8192,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,10240,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,8192,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,7168,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,65536,0.05135999992489815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,65536,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,8192,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,7168,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,6144,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,6144,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,5120,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,6144,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,7168,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,5120,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,5120,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,65536,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,4096,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,4096,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,3584,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,4096,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,3584,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,3584,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,3072,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,3072,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,2560,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,3072,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,2560,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,2560,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,2048,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,1536,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,2048,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,1536,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,1024,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,1536,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,1024,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,1024,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,768,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,512,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,768,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,768,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,512,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,256,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,256,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,256,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,128,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,64,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,128,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,32,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,32,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,32,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,12288,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,12288,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,16384,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,16384,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,16384,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,12288,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,10240,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,10240,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,8192,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,10240,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,8192,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,65536,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,7168,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,65536,0.04681599885225296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,7168,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,6144,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,6144,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,7168,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,5120,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,6144,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,5120,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,65536,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,4096,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,5120,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,3584,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,4096,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,4096,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,3584,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,3584,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,3072,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,3072,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,2560,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,8192,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,2560,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,3072,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,2560,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,2048,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,2048,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,2048,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,1536,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,1024,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,1536,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,1536,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,1024,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,768,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,768,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,1024,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,768,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,512,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,256,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,256,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,128,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,512,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,256,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,128,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,128,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,64,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,64,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,32,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,12288,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,16384,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,12288,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,16384,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,16384,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,12288,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,10240,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,10240,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,10240,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,8192,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,8192,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,65536,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,8192,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,65536,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,7168,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,7168,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,7168,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,6144,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,5120,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,6144,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,5120,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,6144,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,4096,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,4096,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,65536,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,5120,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,3584,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,3584,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,4096,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,3584,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,3072,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,3072,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,2560,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,3072,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,2560,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,2048,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,2048,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,2048,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,1536,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,1536,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,1024,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,1536,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,1024,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,768,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,1024,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,768,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,768,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,512,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,512,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,256,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,512,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,256,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,256,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,128,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,64,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,64,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,32,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,32,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,16384,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,16384,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,12288,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,128,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,16384,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,12288,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,12288,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,10240,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,10240,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,65536,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,8192,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,10240,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,8192,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,65536,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,7168,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,7168,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,8192,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,7168,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,6144,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,6144,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,6144,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,5120,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,4096,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,65536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,4096,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,5120,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,5120,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,4096,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,3584,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,3584,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,2560,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,3072,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,3072,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,2560,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,3584,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,3072,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,2048,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,2560,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,2048,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,2048,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,1536,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,1536,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,1536,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,1024,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,1024,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,1024,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,768,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,768,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,512,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,512,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,768,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,256,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,256,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,128,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,128,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,128,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,256,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,64,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,64,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,32,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,32,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,12288,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,12288,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,16384,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,16384,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,16384,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,12288,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,10240,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,10240,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,10240,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,8192,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,8192,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,7168,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,7168,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,65536,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,65536,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,8192,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,6144,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,7168,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,6144,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,5120,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,6144,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,5120,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,65536,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,5120,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,4096,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,4096,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,3584,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,3584,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,3584,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,2560,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,3072,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,2560,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,3072,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,3072,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,2560,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,2048,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,2048,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,2048,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,1024,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,1536,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,1536,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,1024,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,1536,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,4096,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,768,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,768,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,1024,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,512,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,768,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,256,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,512,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,256,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,128,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,256,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,512,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,128,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,64,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,32,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,64,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,32,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,32,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,12288,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,12288,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,16384,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,16384,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,16384,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,12288,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,10240,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,10240,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,10240,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,8192,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,8192,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,65536,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,8192,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,7168,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,65536,0.040832001715898514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,7168,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,6144,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,7168,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,6144,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,6144,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,5120,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,5120,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,5120,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,4096,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,65536,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,4096,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,4096,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,3584,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,3584,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,3072,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,3584,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,2560,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,3072,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,3072,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,2560,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,2560,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,2048,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,2048,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,1536,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,2048,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,1024,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,1536,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,1536,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,1024,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,768,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,1024,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,768,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,768,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,512,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,512,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,256,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,256,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,128,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,256,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,128,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,512,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,128,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,64,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,64,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,32,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,32,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,12288,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,16384,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,12288,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,16384,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,12288,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,10240,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,10240,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,8192,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,10240,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,65536,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,65536,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,8192,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,16384,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,7168,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,7168,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,8192,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,6144,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,6144,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,7168,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,65536,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,5120,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,5120,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,6144,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,4096,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,4096,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,4096,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,3584,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,3584,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,5120,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,3584,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,3072,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,3072,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,2560,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,3072,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,2560,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,2048,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,2560,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,2048,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,2048,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,1536,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,1536,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,1536,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,1024,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,1024,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,768,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,1024,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,768,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,768,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,512,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,512,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,256,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,256,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,256,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,128,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,32,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,64,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,128,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,128,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,64,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,64,0.005696000065654516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,32,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,32,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,12288,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,12288,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,16384,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,16384,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,16384,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,12288,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,10240,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,10240,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,8192,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,8192,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,10240,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,65536,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,7168,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,65536,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,8192,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,7168,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,7168,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,6144,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,6144,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,5120,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,5120,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,6144,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,5120,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,65536,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,4096,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,4096,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,3584,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,3584,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,3072,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,3584,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,4096,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,3072,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,2560,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,3072,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,2048,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,2560,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,2560,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,1536,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,2048,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,1536,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,1536,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,1024,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,1024,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,1024,0.005632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,768,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,512,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,768,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,768,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,512,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,256,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,512,0.005535999778658152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,256,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,2048,0.005696000065654516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,128,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,256,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,64,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,64,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,128,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,128,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,64,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,32,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,32,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,32,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,12288,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,12288,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,16384,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,16384,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,16384,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,12288,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,10240,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,10240,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,10240,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,8192,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,8192,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,65536,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,65536,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,7168,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,8192,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,7168,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,7168,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,6144,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,6144,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,6144,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,5120,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,5120,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,4096,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,5120,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,4096,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,65536,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,4096,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,3584,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,3584,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,3584,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,3072,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,3072,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,3072,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,2560,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,2560,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,2560,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,2048,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,2048,0.005504000000655651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,2048,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,1536,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,1536,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,1536,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,1024,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,1024,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,1024,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,768,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,512,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,768,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,768,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,512,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,256,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,512,0.005632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,256,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,256,0.005727999843657017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,128,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,128,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,64,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,128,0.005727999843657017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,64,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,64,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,32,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,32,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,32,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,12288,0.10444799810647964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,16384,0.13609600067138672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,12288,0.1879040002822876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,10240,0.15824000537395477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,12288,0.18198400735855103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,16384,0.24700799584388733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,8192,0.07385600358247757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,10240,0.09468799829483032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,16384,0.240447998046875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,8192,0.12678399682044983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,10240,0.15379199385643005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,8192,0.12451200187206268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,7168,0.06745599955320358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,7168,0.11462400108575821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,6144,0.062111999839544296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,6144,0.09731200337409973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,5120,0.057920001447200775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,6144,0.09471999853849411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,7168,0.10982400178909302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,5120,0.08511999994516373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,4096,0.06800000369548798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,4096,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,5120,0.08275199681520462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,3584,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,3584,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,4096,0.06598400324583054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,3072,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,3072,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,3584,0.058368001133203506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,2560,0.051711998879909515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,2560,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,3072,0.054336000233888626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,2048,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,2048,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,2560,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,1536,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,2048,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,1536,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,1024,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,1536,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,1024,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,768,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,1024,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,768,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,512,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,512,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,768,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,256,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,512,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,256,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,256,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,128,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,128,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,64,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,128,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,32,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,64,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,64,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,32,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,12288,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,12288,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,16384,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,16384,0.0695360004901886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,16384,0.0671359971165657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,10240,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,12288,0.05040000006556511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,10240,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,8192,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,10240,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,8192,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,7168,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,8192,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,65536,0.13760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,6144,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,7168,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,6144,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,5120,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,6144,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,4096,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,5120,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,4096,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,65536,0.2534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,4096,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,3584,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,3584,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,3584,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,3072,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,3072,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,3072,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,2560,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,2560,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,65536,0.24755200743675232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,2560,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,2048,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,1536,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,512,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,256,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,256,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,64,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,128,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,64,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,64,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,32,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,32,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,32,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,12288,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,16384,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,16384,0.05881600081920624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,16384,0.050464000552892685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,12288,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,10240,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,12288,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,10240,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,10240,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,8192,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,7168,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,8192,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,7168,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,65536,0.11750400066375732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,7168,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,6144,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,6144,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,5120,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,65536,0.22022399306297302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,5120,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,6144,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,4096,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,5120,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,4096,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,3584,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,65536,0.18559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,3584,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,3072,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,2560,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,3072,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,2048,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,2048,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,2048,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,1536,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,256,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,128,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,64,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,64,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,64,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,32,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,32,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,32,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,12288,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,12288,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,16384,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,16384,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,16384,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,12288,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,10240,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,10240,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,10240,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,8192,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,8192,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,7168,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,8192,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,65536,0.11017599701881409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,7168,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,6144,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,6144,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,7168,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,65536,0.20387199521064758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,6144,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,5120,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,5120,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,4096,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,5120,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,3584,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,3584,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,65536,0.15913599729537964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,3584,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,3072,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,2560,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,2048,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,2048,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,2560,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,2048,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,4096,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,1536,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,768,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,64,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,128,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,64,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,64,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,32,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,32,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,32,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,12288,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,12288,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,16384,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,16384,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,16384,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,10240,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,12288,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,10240,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,10240,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,8192,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,8192,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,7168,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,8192,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,65536,0.07286400347948074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,7168,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,6144,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,6144,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,7168,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,65536,0.13305599987506866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,6144,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,5120,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,5120,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,4096,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,5120,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,4096,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,3584,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,4096,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,3584,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,3584,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,3072,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,3072,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,65536,0.12867200374603271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,3072,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,2560,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,2560,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,2048,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,2048,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,2048,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,2560,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,1536,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,1536,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,1024,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,1024,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,768,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,1024,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,768,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,512,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,768,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,512,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,12288,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,12288,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,16384,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,16384,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,12288,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,10240,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,10240,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,10240,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,8192,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,8192,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,8192,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,7168,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,16384,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,65536,0.06758400052785873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,65536,0.11875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,7168,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,6144,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,7168,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,6144,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,5120,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,5120,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,6144,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,5120,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,4096,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,4096,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,65536,0.115167997777462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,4096,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,3584,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,3584,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,3072,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,3584,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,3072,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,3072,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,2560,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,2560,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,2560,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,2048,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,2048,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,2048,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,1536,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,1536,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,1024,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,1536,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,1024,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,768,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,1024,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,768,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,512,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,512,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,768,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,512,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,32,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,12288,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,12288,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,16384,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,16384,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,16384,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,10240,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,12288,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,10240,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,10240,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,8192,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,8192,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,7168,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,65536,0.06035200133919716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,7168,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,8192,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,6144,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,65536,0.1042879968881607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,7168,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,6144,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,5120,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,5120,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,6144,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,4096,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,5120,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,65536,0.09775999933481216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,4096,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,4096,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,3584,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,3584,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,3584,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,3072,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,2560,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,3072,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,2560,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,3072,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,2560,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,2048,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,2048,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,1536,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,1536,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,1536,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,1024,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,1024,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,768,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,768,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,512,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,768,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,512,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,512,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,2048,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,12288,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,12288,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,16384,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,16384,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,16384,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,10240,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,12288,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,10240,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,10240,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,8192,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,8192,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,65536,0.055904000997543335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,8192,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,7168,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,7168,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,65536,0.09408000111579895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,6144,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,6144,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,5120,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,7168,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,6144,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,5120,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,5120,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,4096,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,3584,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,3584,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,65536,0.08374399691820145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,4096,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,4096,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,3584,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,3072,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,2560,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,3072,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,3072,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,2560,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,2048,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,2560,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,2048,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,2048,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,1536,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,1536,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,1536,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,1024,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,768,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,768,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,512,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,512,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,512,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,16384,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,16384,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,12288,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,16384,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,12288,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,10240,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,10240,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,10240,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,8192,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,8192,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,65536,0.05238400027155876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,65536,0.08460800349712372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,12288,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,7168,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,7168,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,6144,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,7168,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,8192,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,6144,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,6144,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,5120,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,65536,0.07123199850320816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,5120,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,4096,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,4096,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,5120,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,4096,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,3584,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,3584,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,3584,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,3072,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,3072,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,2560,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,3072,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,2560,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,2560,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,2048,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,2048,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,2048,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,1536,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,1536,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,1536,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,1024,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,768,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,1024,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,768,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,512,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,512,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,256,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,12288,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,12288,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,16384,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,16384,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,16384,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,12288,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,10240,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,10240,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,10240,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,8192,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,8192,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,8192,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,7168,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,65536,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,7168,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,65536,0.07705599814653397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,6144,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,7168,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,6144,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,6144,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,5120,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,5120,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,4096,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,5120,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,4096,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,4096,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,3584,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,3584,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,3584,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,65536,0.06735999882221222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,3072,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,3072,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,3072,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,2560,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,2560,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,2048,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,2560,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,2048,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,2048,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,1536,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,1536,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,1536,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,1024,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,1024,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,1024,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,512,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,768,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,512,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,768,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,768,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,256,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,12288,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,512,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,16384,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,16384,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,12288,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,16384,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,12288,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,10240,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,10240,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,8192,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,10240,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,8192,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,7168,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,7168,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,65536,0.07027199864387512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,8192,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,65536,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,7168,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,6144,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,6144,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,5120,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,5120,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,6144,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,65536,0.05344000086188316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,5120,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,4096,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,4096,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,4096,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,3584,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,3584,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,3584,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,3072,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,2560,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,3072,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,3072,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,2560,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,2048,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,2560,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,2048,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,2048,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,1536,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,1536,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,1024,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,1024,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,768,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,1536,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,1024,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,768,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,512,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,512,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,256,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,128,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,12288,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,12288,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,16384,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,16384,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,16384,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,12288,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,10240,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,10240,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,10240,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,8192,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,8192,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,65536,0.04652800038456917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,7168,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,8192,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,7168,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,65536,0.061824001371860504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,7168,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,6144,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,6144,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,6144,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,5120,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,65536,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,4096,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,5120,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,4096,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,4096,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,3584,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,3584,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,3072,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,2560,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,3072,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,3584,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,2560,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,3072,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,2560,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,2048,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,5120,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,2048,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,1536,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,1536,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,2048,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,1024,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,1536,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,1024,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,768,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,768,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,1024,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,768,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,512,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,256,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,128,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,64,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,32,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,12288,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,12288,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,16384,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,16384,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,16384,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,12288,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,10240,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,10240,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,10240,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,8192,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,8192,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,7168,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,65536,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,8192,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,7168,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,65536,0.05452800169587135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,7168,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,6144,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,6144,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,5120,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,6144,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,5120,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,4096,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,4096,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,5120,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,4096,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,65536,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,3584,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,3584,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,3584,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,3072,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,3072,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,2560,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,2560,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,2560,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,2048,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,2048,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,3072,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,1536,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,2048,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,1536,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,1024,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,1536,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,1024,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,1024,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,768,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,768,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,768,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,512,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,512,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,256,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,512,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,128,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,256,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,64,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,32,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,12288,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,16384,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,16384,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,12288,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,16384,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,12288,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,10240,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,10240,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,10240,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,8192,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,8192,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,65536,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,8192,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,65536,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,7168,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,7168,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,7168,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,6144,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,6144,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,5120,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,5120,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,6144,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,5120,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,65536,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,4096,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,4096,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,4096,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,3584,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,3072,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,3584,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,3072,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,3584,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,3072,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,2560,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,2560,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,2560,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,1536,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,2048,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,1536,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,2048,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,1024,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,1536,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,2048,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,1024,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,768,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,1024,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,768,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,512,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,512,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,256,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,256,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,128,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,32,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,64,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,12288,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,16384,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,12288,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,16384,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,16384,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,12288,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,10240,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,10240,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,10240,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,8192,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,8192,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,65536,0.06876800209283829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,65536,0.04243199899792671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,8192,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,7168,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,7168,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,7168,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,6144,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,6144,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,5120,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,6144,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,5120,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,4096,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,65536,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,4096,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,5120,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,4096,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,3584,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,3584,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,3072,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,3072,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,3584,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,2560,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,2560,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,2048,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,2560,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,2048,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,1536,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,2048,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,1536,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,1536,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,1024,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,1024,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,768,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,768,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,1024,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,768,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,3072,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,256,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,512,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,256,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,128,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,64,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,12288,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,16384,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,12288,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,16384,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,16384,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,12288,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,10240,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,10240,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,10240,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,8192,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,8192,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,65536,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,65536,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,8192,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,7168,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,7168,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,6144,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,6144,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,7168,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,6144,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,5120,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,65536,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,4096,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,5120,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,4096,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,4096,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,5120,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,3584,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,3584,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,3072,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,3584,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,3072,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,3072,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,2560,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,2560,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,2560,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,2048,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,1536,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,2048,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,1536,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,1536,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,1024,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,768,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,768,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,768,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,1024,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,512,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,512,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,256,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,64,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,128,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,32,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,12288,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,16384,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,16384,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,16384,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,12288,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,10240,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,8192,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,10240,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,10240,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,65536,0.06892800331115723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,65536,0.04262400045990944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,8192,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,12288,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,7168,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,8192,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,7168,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,6144,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,7168,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,6144,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,6144,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,65536,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,5120,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,4096,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,5120,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,5120,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,4096,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,4096,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,3584,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,3584,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,3072,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,3584,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,3072,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,2560,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,2560,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,3072,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,2560,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,2048,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,1536,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,2048,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,2048,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,1536,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,1024,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,1536,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,1024,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,1024,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,768,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,768,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,512,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,512,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,256,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,128,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,64,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,32,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,12288,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,12288,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,16384,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,16384,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,16384,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,12288,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,10240,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,10240,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,10240,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,8192,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,8192,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,65536,0.06806399673223495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,7168,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,8192,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,65536,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,7168,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,6144,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,7168,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,6144,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,5120,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,5120,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,6144,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,4096,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,65536,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,4096,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,5120,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,3584,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,4096,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,3584,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,3072,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,2560,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,3072,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,3584,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,3072,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,2560,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,2048,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,2560,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,2048,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,1536,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,2048,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,1536,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,1536,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,1024,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,1024,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,768,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,768,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,768,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,512,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,512,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,256,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,512,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,256,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,256,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,128,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,64,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,32,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,1024,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,12288,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,16384,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,16384,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,12288,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,16384,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,12288,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,10240,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,10240,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,8192,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,10240,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,65536,0.06809599697589874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,65536,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,8192,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,8192,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,7168,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,7168,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,6144,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,6144,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,7168,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,6144,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,5120,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,65536,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,5120,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,5120,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,4096,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,4096,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,3584,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,4096,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,3072,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,3584,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,3072,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,3584,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,2560,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,2560,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,3072,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,2560,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,2048,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,2048,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,1536,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,1536,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,2048,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,1024,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,1536,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,1024,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,768,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,1024,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,512,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,768,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,512,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,256,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,256,0.005760000087320805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,128,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,64,0.005696000065654516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,32,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,32,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,12288,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,12288,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,16384,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,16384,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,16384,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,12288,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,10240,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,10240,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,10240,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,8192,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,8192,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,65536,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,65536,0.06723199784755707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,7168,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,8192,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,7168,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,6144,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,6144,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,6144,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,65536,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,5120,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,5120,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,5120,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,4096,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,4096,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,4096,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,3584,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,3584,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,3584,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,3072,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,3072,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,3072,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,2560,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,7168,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,2560,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,2048,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,2048,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,2560,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,2048,0.005663999821990728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,1536,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,1536,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,1536,0.005696000065654516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,1024,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,1024,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,768,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,768,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,768,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,512,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,256,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,512,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,128,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,64,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,256,0.0055680000223219395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,128,0.005727999843657017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,64,0.005696000065654516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,32,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,32,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,12288,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,12288,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,16384,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,16384,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,16384,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,12288,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,10240,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,10240,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,10240,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,8192,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,65536,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,8192,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,65536,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,8192,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,7168,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,7168,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,7168,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,6144,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,6144,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,5120,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,6144,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,65536,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,5120,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,5120,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,4096,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,4096,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,4096,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,3072,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,3584,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,3584,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,3584,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,3072,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,3072,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,2560,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,2560,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,2048,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,2560,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,2048,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,2048,0.005760000087320805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,1536,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,1536,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,1536,0.005696000065654516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,1024,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,1024,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,768,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,1024,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,512,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,512,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,768,0.005727999843657017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,256,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,256,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,512,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,128,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,256,0.005632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,128,0.005696000065654516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,32,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,64,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,32,0.005439999978989363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,16384,25.569333902994792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,16384,13.600945027669272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,10240,8.584651692708333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,12288,10.325757853190105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,8192,12.686742146809895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,8192,7.401277669270833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,10240,16.67908630371094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,12288,19.708662923177084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,10240,10.031414794921876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,12288,12.033438110351563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,16384,16.94527587890625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,7168,6.101802571614583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,6144,5.6088200887044275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,7168,11.105733235677082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,6144,9.80919698079427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,5120,4.4330591837565105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,8192,8.086582438151042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,5120,7.856931050618489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,4096,3.6668960571289064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,4096,6.834418233235676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,3584,3.1907689412434896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,7168,7.153193664550781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,3584,5.810167439778646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,3072,2.8344342549641928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,6144,6.1623998006184895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,5120,5.103999837239583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,3072,4.937382507324219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,2560,2.277269236246745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,2048,1.829864501953125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,2560,4.096483103434244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,2048,3.236845906575521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,1536,1.3710763295491537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,1536,2.4738784790039063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,1024,0.9790527979532877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,1024,1.8416149139404296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,768,1.3479925791422525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,3584,3.746539815266927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,4096,4.290443929036458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,768,0.7798346837361654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,2560,2.839390818277995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,3072,3.2752395629882813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,512,0.5824511845906575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,256,0.5282325426737468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,256,0.41279252370198566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,512,0.81539306640625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,128,0.38964481353759767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,128,0.3759071985880534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,64,0.3628992080688477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,2048,2.365958404541016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,64,0.3671669324239095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,65536,32,0.3490549405415853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,65536,32,0.3564661343892416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,1536,1.820733896891276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,256,0.7586751937866211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,128,0.7270495732625325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,768,1.1346229553222655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,512,0.9184757232666015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,65536,1024,1.3392991383870443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,12288,2.8189951578776045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,16384,3.804627227783203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,12288,4.820550537109375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,16384,6.41053975423177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,10240,4.090799967447916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,10240,2.3670209248860674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,8192,1.9056223551432292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,8192,3.247206370035807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,65536,15.64560343424479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,7168,1.5639306386311849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,16384,4.141307830810547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,7168,3.306122589111328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,12288,3.1211530049641927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,6144,1.438217544555664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,10240,2.6103370666503904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,6144,2.3993044535319012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,5120,1.1410026550292969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,5120,2.1092586517333984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,4096,0.900271987915039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,4096,1.6046048482259114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,3584,1.3962261199951171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,65536,26.085829671223955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,3584,0.8257333119710287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,8192,2.0911136627197267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,3072,1.1900639851888022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,7168,1.8056607564290363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,65536,17.95789794921875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,3072,0.7164565404256185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,2560,0.5959434509277344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,2560,0.9365472157796224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,5120,1.2549610137939453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,2048,0.4872533480326335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,2048,0.731829325358073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,6144,1.4069418589274088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,1536,0.5523946762084961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,1536,0.38427626291910805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,1024,0.3821002642313639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,1024,0.26896320978800453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,768,0.294761594136556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,768,0.21642026901245118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,4096,0.9902495702107748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,512,0.22420159975687662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,3584,0.8566432317097983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,512,0.17891947428385419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,2560,0.6558997472127278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,256,0.13129813671112062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,256,0.1119989315668742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,3072,0.7538965225219727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,2048,0.5359669367472331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,128,0.09683413505554199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,128,0.10259839693705242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,64,0.08776000340779623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,64,0.10127147038777669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,16384,32,0.08755733172098795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,16384,32,0.10019199848175049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,1536,0.44287999471028644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,768,0.2995370546976725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,1024,0.33258558909098307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,256,0.20182719230651855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,512,0.24386347134908043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,16384,128,0.1928064028422038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,16384,2.821856943766276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,16384,4.647411092122396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,12288,2.0040117899576826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,12288,3.4612022399902345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,10240,1.7187924702962238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,10240,2.950936635335286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,8192,1.363124211629232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,8192,2.35787836710612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,65536,11.37843526204427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,7168,1.2626261393229166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,7168,2.036121622721354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,6144,1.7355796813964843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,12288,2.4190752665201822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,6144,1.0532213211059571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,16384,3.1642720540364584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,10240,1.98462397257487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,5120,0.8415690739949545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,5120,1.6645599365234376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,65536,19.572711181640624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,4096,1.0810399373372397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,4096,0.706773312886556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,3584,0.5996512095133464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,3584,1.0170645395914713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,3072,0.8634239832560221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,3072,0.5360629399617513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,8192,1.43209597269694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,7168,1.363381322224935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,65536,14.332980346679687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,2560,0.7027829488118489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,2560,0.46875092188517253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,2048,0.37045653661092126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,2048,0.5567616144816081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,1536,0.4005333264668782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,1536,0.29069334665934243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,5120,0.8984885533650717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,1024,0.32189121246337893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,6144,1.1500170389811197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,1024,0.2120682716369629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,768,0.21829120318094888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,768,0.16880000432332357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,4096,0.7504469553629558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,512,0.17884052594502767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,512,0.13789440790812174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,3584,0.6575466791788738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,256,0.10462933381398518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,256,0.09556266466776529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,2560,0.4858367919921875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,128,0.07398080031077067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,128,0.08554240067799887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,3072,0.5730954488118489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,64,0.06281919876734415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,2048,0.4145610809326172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,64,0.0832266648610433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,12288,32,0.06332799990971884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,12288,32,0.08177706400553385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,1536,0.3327616055806478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,768,0.22819520632425944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,512,0.18653225898742676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,1024,0.2558826605478922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,256,0.1527882734934489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,12288,128,0.14384533564249674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,16384,2.429326883951823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,12288,1.790722147623698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,12288,2.991283162434896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,16384,3.931904093424479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,10240,1.5154847462972005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,10240,2.5152320861816406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,8192,1.1945738474527994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,65536,9.897188313802083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,8192,1.9666431427001954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,7168,1.0407925287882487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,7168,1.6761044820149738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,6144,1.6130560557047526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,6144,0.9266496022542319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,16384,2.6691253662109373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,12288,2.0238282521565756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,10240,1.5831904093424478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,65536,17.04828898111979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,5120,1.171734364827474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,5120,0.7504533131917317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,4096,0.9470954895019531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,4096,0.5929248174031575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,3584,0.8362357457478842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,3584,0.5309877395629883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,65536,10.71529541015625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,7168,1.075323740641276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,8192,1.229864501953125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,3072,0.6854517618815105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,3072,0.4621429443359375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,2560,0.39931306838989256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,2560,0.5898165384928385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,2048,0.44635413487752273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,2048,0.33969812393188475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,1536,0.3634527842203776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,1536,0.26193599700927733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,5120,0.7504095713297526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,1024,0.25013972918192545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,1024,0.18318079312642416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,6144,0.9088906606038412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,768,0.22561279932657877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,768,0.18032639821370441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,4096,0.6217674891153971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,512,0.1346303939819336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,512,0.11982186635335286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,3584,0.5416714350382488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,256,0.09380373160044352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,256,0.08342080116271973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,2560,0.410808531443278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,128,0.06466986735661825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,128,0.07161920070648194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,2048,0.3460192044576009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,64,0.055720531940460206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,3072,0.4808095932006836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,64,0.07112853527069092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,10240,32,0.057468799750010166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,10240,32,0.07002666791280111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,1536,0.2809770584106445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,512,0.15655040740966797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,768,0.1910762627919515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,1024,0.2155914624532064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,256,0.128384002049764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,10240,128,0.11963093280792236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,16384,2.0011754353841145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,16384,3.1750719706217447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,12288,1.4898133595784506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,12288,2.3730817159016926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,10240,1.2904927571614584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,10240,1.9661525726318358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,8192,1.0134623845418296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,8192,1.5887114206949868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,65536,8.103587341308593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,7168,0.8599221547444662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,7168,1.38798828125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,6144,1.1845205942789714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,6144,0.7954463958740234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,65536,13.192713419596354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,5120,1.0860149383544921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,10240,1.3291093190511067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,12288,1.6146677652994792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,16384,2.195784505208333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,5120,0.631661860148112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,4096,0.4873311996459961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,4096,0.7815573374430339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,3584,0.6498698552449544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,3584,0.43398081461588545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,3072,0.5410751978556315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,3072,0.37337493896484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,65536,8.984668986002605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,7168,0.8633866628011069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,8192,1.0419455846150716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,2560,0.4594335873921712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,2560,0.30765225092569987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,2048,0.40041812260945636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,2048,0.2454592068990072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,1536,0.27309865951538087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,1536,0.2112821261088053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,5120,0.6030357360839844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,6144,0.75819517771403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,1024,0.18839786847432455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,1024,0.15844160715738934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,768,0.14919466972351075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,768,0.12593386967976888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,4096,0.5157322565714518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,512,0.11797226270039876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,512,0.10039467016855877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,3584,0.45322345097859695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,256,0.06899627049763998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,256,0.0688981294631958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,2560,0.3403168042500814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,128,0.04960213502248128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,128,0.060989868640899655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,3072,0.3993343989054362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,64,0.04538666804631551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,64,0.06137066682179769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,2048,0.2898805300394694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,8192,32,0.04630719820658366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,8192,32,0.059562667210896814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,1536,0.23396479288736977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,768,0.15795520146687825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,512,0.13024853070576986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,256,0.1061738650004069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,1024,0.1780896027882894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,8192,128,0.0980458656946818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,16384,1.7879776000976562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,16384,2.723468780517578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,12288,1.9461397806803387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,12288,1.3453547159830728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,65536,7.153217061360676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,10240,1.1235018412272135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,10240,1.7007818857828778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,8192,0.8916927973429362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,8192,1.2807103474934896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,7168,0.7816138585408529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,7168,1.1449578603108725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,65536,10.980812581380208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,6144,0.994969622294108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,6144,0.6681472142537435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,5120,0.8143818537394205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,5120,0.6198517481486003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,16384,1.8854869842529296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,12288,1.4478922526041667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,10240,1.1456746419270833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,4096,0.6375637054443359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,4096,0.5073514620463053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,3584,0.537458101908366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,3584,0.3998271942138672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,3072,0.45978879928588867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,65536,7.682283528645833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,3072,0.36313600540161134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,2560,0.38452266057332357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,2560,0.2972970644632975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,7168,0.7724917093912761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,8192,0.916989835103353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,2048,0.31619307200113933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,2048,0.2761311848958333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,1536,0.2584277311960856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,1536,0.189081605275472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,5120,0.5599072138468425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,6144,0.6678666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,1024,0.16953813234965007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,1024,0.14647679328918456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,768,0.1306943972905477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,768,0.11351253191630047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,4096,0.45733334223429367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,512,0.0935914675394694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,512,0.08503466447194417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,3584,0.4047082583109538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,256,0.0577621340751648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,2560,0.30549119313557943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,256,0.06398719946543376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,128,0.04440853198369344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,128,0.05631893475850423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,3072,0.35702292124430335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,64,0.040516265233357746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,64,0.05530666510264078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,2048,0.2636842727661133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,7168,32,0.040769068400065105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,7168,32,0.054416000843048096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,1536,0.21392532984415688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,768,0.145686403910319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,1024,0.16472427050272626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,256,0.0957354704538981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,128,0.08889919916788737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,7168,512,0.11695146560668945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,16384,2.2942581176757812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,16384,1.5847989400227864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,12288,1.1821205139160156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,12288,1.6940427144368488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,10240,1.4090880076090495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,10240,0.9981173197428385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,65536,6.4142710367838545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,8192,0.8798709233601889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,8192,1.14858767191569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,7168,0.7231360117594401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,7168,0.9737728118896485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,65536,9.515811157226562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,6144,0.8062378565470377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,6144,0.605891227722168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,5120,0.6744949340820312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,5120,0.5242090543111165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,10240,0.9591605504353842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,12288,1.186745580037435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,4096,0.5361013412475586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,4096,0.4200618743896484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,16384,1.7185471852620442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,3584,0.46148265202840166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,3584,0.3773450533548991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,3072,0.39581546783447263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,3072,0.31940266291300456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,65536,6.799902852376301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,7168,0.6797653198242187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,2560,0.35531838734944665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,8192,0.7784117380777995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,2560,0.2721813201904297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,2048,0.2725023905436198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,2048,0.22508692741394043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,1536,0.20540053049723306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,1536,0.17178346316019694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,5120,0.4822133382161458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,6144,0.5723520278930664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,1024,0.14113705952962238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,1024,0.12974826494852704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,4096,0.3948629379272461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,768,0.10943146546681722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,768,0.10121066570281982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,3584,0.3555487950642904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,512,0.08053546746571859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,2560,0.26492373148600257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,512,0.07719786961873373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,256,0.04975786606470744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,256,0.05761066675186157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,128,0.038976001739501956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,128,0.05010453462600708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,2048,0.22744533220926919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,3072,0.31065279642740884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,64,0.03491946856180827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,64,0.05035200119018555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,6144,32,0.0356383999188741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,6144,32,0.04967466592788696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,1536,0.18370240529378254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,1024,0.14208000500996906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,256,0.0815999984741211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,128,0.07574933369954427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,512,0.10118719736735027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,16384,1.9195968627929687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,6144,768,0.12343040307362874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,16384,1.3720320383707683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,65536,5.581806945800781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,12288,1.3373483022054038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,12288,1.0456565221150718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,10240,1.1940074920654298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,10240,0.8809674580891927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,8192,0.9454794565836588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,8192,0.7134506861368816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,65536,7.792794799804687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,7168,0.832813835144043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,7168,0.6183839797973633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,6144,0.6720586776733398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,6144,0.5376501083374023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,5120,0.553870964050293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,5120,0.45541973114013673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,10240,0.8765130360921225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,12288,0.9981290817260742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,16384,1.395301310221354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,4096,0.5178506533304851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,4096,0.374888547261556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,65536,5.732457478841146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,3584,0.38665065765380857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,3584,0.3186079978942871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,3072,0.332969601949056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,3072,0.27721920013427737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,2560,0.27516053517659506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,7168,0.5778602600097656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,2560,0.23418879508972168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,2048,0.22187093098958335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,8192,0.6769557317097982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,2048,0.19300907452901203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,1536,0.16923947334289552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,1536,0.15112959543863932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,5120,0.41671145757039385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,1024,0.12063146432240804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,6144,0.5005002657572428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,1024,0.1129205306371053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,768,0.09512106577555338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,768,0.09375253518422445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,4096,0.346886412302653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,2560,0.23012053171793617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,512,0.07194986343383789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,512,0.073471999168396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,256,0.0458026647567749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,256,0.05321173270543417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,3584,0.3095989227294922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,128,0.03331306576728821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,128,0.04456213315327962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,3072,0.2692970593770345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,2048,0.1974079926808675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,64,0.029654399553934736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,64,0.04446719884872437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,5120,32,0.030793599287668866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,5120,32,0.04412480195363362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,1536,0.1605237325032552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,768,0.10851946671803792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,1024,0.12446186542510987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,16384,1.478583526611328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,16384,1.1756309509277343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,256,0.0713749329249064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,128,0.06507946650187174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,5120,512,0.08698986371358236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,12288,1.091387685139974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,65536,4.7975102742513025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,12288,0.888477897644043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,10240,0.8997418721516928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,10240,0.7493247985839844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,8192,0.68766295115153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,65536,6.264198303222656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,8192,0.6076543807983399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,7168,0.6094218571980794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,7168,0.5320277214050293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,6144,0.5366986592610676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,6144,0.45499305725097655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,5120,0.4324362754821777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,5120,0.380405330657959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,10240,0.6800213495890299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,12288,0.8465087890625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,16384,1.15143674214681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,4096,0.3521973292032877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,4096,0.3266592025756836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,65536,4.865602111816406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,3584,0.3082058588663737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,3584,0.2809877395629883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,3072,0.27103999455769856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,3072,0.24316372871398925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,7168,0.4766890525817871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,2560,0.2198794682820638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,2560,0.2055232048034668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,2048,0.17797013918558757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,2048,0.16878080368041992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,8192,0.5405408223470052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,1536,0.13551360766092937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,1536,0.13039786815643312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,6144,0.4127552032470703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,1024,0.10238719781239827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,1024,0.10010026295979817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,5120,0.34542293548583985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,768,0.07579946517944336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,768,0.07998507022857666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,4096,0.28715521494547525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,2560,0.19077866872151691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,512,0.05541866620381673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,512,0.061655465761820474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,3584,0.25109759966532386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,256,0.03535360097885132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,256,0.045881601174672444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,128,0.027637332677841187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,128,0.039559467633565264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,2048,0.16284373601277669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,3072,0.22326505978902184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,64,0.024171733856201173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,4096,32,0.025171200434366863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,64,0.03951359987258911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,4096,32,0.038756267229715986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,1536,0.13125332991282146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,1024,0.10139626661936443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,256,0.057256531715393064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,128,0.05265493392944336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,16384,1.3299808502197266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,768,0.08842879931131999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,65536,4.392016092936197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,4096,512,0.07132480144500733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,16384,1.0837045033772787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,12288,0.9892629623413086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,12288,0.8125855763753256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,65536,5.536991882324219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,10240,0.7556351979573568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,10240,0.6926591873168946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,8192,0.5941418965657552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,8192,0.5616810480753581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,7168,0.5429962793986003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,7168,0.500275198618571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,6144,0.47658348083496094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,6144,0.42407894134521484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,5120,0.37581758499145507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,5120,0.35352853139241536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,10240,0.6194271723429362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,65536,4.2783447265625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,12288,0.7359093348185222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,4096,0.30862080256144203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,4096,0.30927680333455404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,3584,0.27313388188680016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,3584,0.26411840120951335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,7168,0.4348885218302409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,3072,0.22925012906392417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,3072,0.2246389389038086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,2560,0.1947999954223633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,2560,0.1905685265858968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,16384,1.004741350809733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,8192,0.4975520133972168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,5120,0.31185601552327474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,2048,0.1640714645385742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,2048,0.15894293785095215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,1536,0.1200544039408366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,1536,0.12261013189951579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,6144,0.3767765363057455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,1024,0.0843722661336263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,1024,0.08995093504587809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,768,0.06730453173319498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,768,0.07474559942881266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,4096,0.2607306639353434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,512,0.049219199021657306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,512,0.05805973211924235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,2560,0.17296427090962727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,256,0.032001066207885745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,3584,0.23297492663065592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,256,0.04275840123494466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,128,0.024752000967661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,128,0.03816106716791789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,2048,0.14798933664957684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,64,0.022791467110315957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,3072,0.20288532574971518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,64,0.036857601006825766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,1536,0.12050879796346028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3584,32,0.02444266676902771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3584,32,0.0365610678990682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,768,0.08044693470001221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,1024,0.0936842679977417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,16384,1.077800496419271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,256,0.05279573202133179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,128,0.047622398535410566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,16384,1.0104074478149414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,65536,3.9920735677083337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,65536,4.7035069783528645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3584,512,0.06478506724039713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,12288,0.8118122736612955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,12288,0.7558090845743816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,10240,0.7114154815673828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,10240,0.628497060139974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,8192,0.5142719904581706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,8192,0.5213845252990723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,7168,0.45201066335042317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,7168,0.4485184033711751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,6144,0.38716586430867517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,6144,0.3818922678629557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,5120,0.3187978744506836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,5120,0.32215894063313805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,10240,0.5494496027628581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,65536,3.802145131429037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,12288,0.6474229176839192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,4096,0.2651072025299072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,4096,0.27569812138875327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,3584,0.23304640452067055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,3584,0.2391061305999756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,16384,0.9246346791585287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,3072,0.1995189348856608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,3072,0.21693867047627768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,2560,0.17159040768941242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,2560,0.17786347071329753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,8192,0.4398346583048503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,5120,0.27612053553263344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,2048,0.13514879544576008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,2048,0.14353814125061035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,1536,0.10237653255462646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,6144,0.3327360153198242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,1536,0.11267306804656982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,1024,0.07229973475138346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,1024,0.08358399868011475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,7168,0.38278185526529945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,768,0.05853013197580973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,768,0.07013973395029703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,4096,0.23163305918375648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,512,0.04352426528930664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,2560,0.15468907356262207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,512,0.0544981320699056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,256,0.028908799091974895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,3584,0.20747733116149902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,256,0.04247573216756185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,128,0.021486934026082358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,128,0.03680213292439778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,3072,0.1811402638753255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,64,0.019769599040349327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,2048,0.13244586785634357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,64,0.03600213527679443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,3072,32,0.021254400412241616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,3072,32,0.035035733381907144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,1536,0.10747733116149902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,768,0.07107733090718588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,16384,0.8945760091145833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,1024,0.08253333568572999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,16384,0.9138624191284179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,65536,3.8335861206054687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,256,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,65536,3.6046740214029946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,128,0.04203946590423584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,12288,0.6666336059570312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,10240,0.5341279983520508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,3072,512,0.05703146855036417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,12288,0.6919551849365234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,10240,0.5714229583740235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,8192,0.4421674728393555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,8192,0.4636234601338704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,7168,0.3718784014383952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,7168,0.4192074775695801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,6144,0.32067626317342124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,6144,0.35652799606323243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,5120,0.2728362719217936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,5120,0.3035626729329427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,10240,0.47353385289510086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,16384,0.7686656316121419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,4096,0.2186720053354899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,65536,3.330335998535156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,12288,0.5759189605712891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,4096,0.25424532890319823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,3584,0.20387412707010905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,3584,0.22657279968261718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,3072,0.16985492706298827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,3072,0.19263787269592286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,2560,0.14091307322184246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,7168,0.33898134231567384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,2560,0.1636352062225342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,2048,0.11472960313161214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,2048,0.1338784058888753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,5120,0.24608960151672363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,8192,0.3859498659769694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,1536,0.08842346668243409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,6144,0.2925930658976237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,1536,0.10456853707631428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,1024,0.0627616008122762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,1024,0.0780245304107666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,768,0.05012266635894776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,768,0.06486613353093465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,2560,0.13557119369506837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,4096,0.2034261385599772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,512,0.03807786703109741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,512,0.05203946828842163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,3584,0.18010560671488446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,256,0.026020266612370807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,256,0.039086933930714926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,128,0.019538132349650066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,128,0.033163734277089435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,3072,0.15915199915568035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,64,0.016667733589808144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,64,0.03283519943555196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2560,32,0.01715839902559916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2560,32,0.03195733428001404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,1536,0.09419946670532227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,1024,0.07242560386657715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,65536,3.051097615559896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,128,0.03559146722157796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,256,0.039766399065653484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,65536,3.2205184936523437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,2048,0.11491946379343669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,16384,0.6898965199788412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,768,0.06191466649373373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2560,512,0.05003306468327841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,16384,0.8236895879109701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,12288,0.5022624015808106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,12288,0.6154954910278321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,10240,0.42554454803466796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,10240,0.5162495930989583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,8192,0.3390218734741211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,8192,0.41894187927246096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,7168,0.2995882670084635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,7168,0.3778773307800293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,6144,0.2582538604736328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,6144,0.32064425150553383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,5120,0.2142026742299398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,5120,0.271507199605306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,10240,0.4031701405843099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,4096,0.1712117354075114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,4096,0.22433066368103027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,12288,0.48391253153483077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,3584,0.15266985893249513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,16384,0.650830904642741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,3584,0.19870506922403972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,65536,2.808460744222005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,7168,0.28844054539998376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,3072,0.13157227039337158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,3072,0.18677120208740233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,2560,0.1110154628753662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,2560,0.14982293446858724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,5120,0.21067840258280435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,2048,0.0921013355255127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,2048,0.12036693096160889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,8192,0.3305781364440918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,1536,0.07019200325012206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,1536,0.09392746289571127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,1024,0.05062400102615357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,6144,0.2514261404673258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,1024,0.06954346497853597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,768,0.04057600100835164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,768,0.05781333446502686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,4096,0.17382720311482747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,2560,0.11572373708089193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,512,0.031102933486302692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,512,0.04634559949239095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,256,0.021423999468485513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,256,0.03440106709798177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,3584,0.1541418711344401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,2048,0.09757333596547445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,128,0.01578986644744873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,128,0.028794666131337483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,64,0.013589333494504294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,64,0.028940800825754804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,3072,0.13526612917582195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,1536,0.07937920093536377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,2048,32,0.013730133573214212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,2048,32,0.027654399474461872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,768,0.05179946819941202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,16384,0.508241081237793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,1024,0.06110826730728149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,16384,0.7508480072021484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,65536,2.3026143391927083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,12288,0.38385705947875975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,65536,2.8607137044270834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,12288,0.5635520299275716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,10240,0.31890134811401366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,10240,0.4646613438924153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,256,0.03286400039990743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,8192,0.25348374048868816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,512,0.041801599661509196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,2048,128,0.029165865977605183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,8192,0.3749354680379232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,7168,0.21994239489237466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,7168,0.33051732381184895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,6144,0.19294293721516925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,6144,0.2865621248881022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,5120,0.16134826342264813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,5120,0.24253439903259277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,65536,2.327434539794922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,12288,0.40995092391967775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,10240,0.34169174830118815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,16384,0.5447551727294921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,4096,0.13493013381958008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,4096,0.1992853323618571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,3584,0.11664960384368897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,3584,0.1804927984873454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,3072,0.10108160177866618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,3072,0.15624853769938152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,2560,0.08566186428070069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,7168,0.242851193745931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,2560,0.13243626753489177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,2048,0.06990506649017333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,2048,0.10972906748453777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,8192,0.27594451904296874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,1536,0.05422613223393759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,1536,0.0858890692392985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,6144,0.21185173988342285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,1024,0.039345065752665206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,1024,0.062717866897583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,5120,0.17797333399454754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,768,0.032017066081364946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,768,0.051193598906199136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,2560,0.0970421314239502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,512,0.024456532796223958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,4096,0.14865387280782064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,512,0.039738667011260984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,256,0.01755946675936381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,256,0.029754666487375896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,3584,0.13103679815928143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,128,0.01304746667544047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,128,0.02558079957962036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,2048,0.08197013537089029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,3072,0.1149941364924113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,64,0.01112000048160553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,64,0.025638399521509807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1536,32,0.011569066842397054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1536,32,0.024991999069849648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,1536,0.06696000099182128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,65536,1.537612787882487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,16384,0.3798346519470215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,65536,2.5145408630371096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,16384,0.6587018966674805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,1024,0.050826664765675864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,12288,0.25536212921142576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,256,0.026305067539215087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,12288,0.4821951866149902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,768,0.0437173326810201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,128,0.02312320073445638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,10240,0.20720426241556802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,10240,0.4048437436421712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,8192,0.1719818592071533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1536,512,0.03417173226674398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,8192,0.3266698519388834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,7168,0.15141226450602213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,7168,0.28927787144978845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,6144,0.13033173084259034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,6144,0.2505919933319092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,5120,0.10868799686431885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,5120,0.21249279975891114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,10240,0.27617174784342446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,4096,0.0870794693628947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,12288,0.3318943977355957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,65536,1.7784032185872394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,4096,0.17418346405029297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,3584,0.07784000237782797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,16384,0.4395605405171712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,3584,0.1544266700744629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,3072,0.06763199965159097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,3072,0.13490986824035645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,7168,0.19870079358418782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,2560,0.05637120008468628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,2560,0.11549440224965413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,2048,0.045561599731445315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,2048,0.0944917360941569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,5120,0.14587413469950358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,1536,0.035869868596394856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,1536,0.07182400226593018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,8192,0.2249407927195231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,1024,0.025447465976079303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,1024,0.05044053395589193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,6144,0.17233279546101887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,768,0.020651733875274657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,768,0.04003200133641561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,2560,0.07820053100585937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,4096,0.12006399631500245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,512,0.015562666455904641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,512,0.03140053351720174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,256,0.011009066303571065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,256,0.024075732628504435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,3584,0.10618240038553875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,128,0.00829013337691625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,128,0.021144533157348634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,2048,0.06586133241653443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,3072,0.0922922690709432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,64,0.007295999924341838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,64,0.020820266008377074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,1536,0.053255466620127354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,1024,32,0.0075765331586201985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,1024,32,0.020100265741348267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,768,0.03338133494059245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,65536,1.1147871653238932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,1024,0.04002773364384969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,16384,0.27932586669921877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,16384,0.5973994572957356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,65536,2.3570518493652344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,12288,0.19765013058980305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,12288,0.4536725362141927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,10240,0.16445226669311525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,256,0.01987839937210083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,128,0.017356799046198527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,10240,0.3807231903076172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,8192,0.13478186925252278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,1024,512,0.02608533302942912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,7168,0.11867520014444988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,8192,0.3067498524983724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,7168,0.27157761255900065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,6144,0.09886826674143473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,6144,0.23545066515604654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,5120,0.08432106971740723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,5120,0.1998154640197754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,12288,0.29246400197347006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,10240,0.24580373764038085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,65536,1.583257548014323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,4096,0.06849706967671712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,16384,0.3889333407084147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,4096,0.16291093826293945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,3584,0.06047893365224203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,3584,0.14511893590291342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,3072,0.052407467365264894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,3072,0.1267306645711263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,2560,0.044760533173878986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,7168,0.1762741406758626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,2560,0.10808426539103191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,2048,0.03665066560109456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,2048,0.08839680353800455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,5120,0.12770346800486249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,1536,0.027896533409754436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,8192,0.20128107070922852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,1536,0.06601066589355468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,1024,0.020020266373952232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,6144,0.15262826283772785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,1024,0.04530666669209798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,768,0.016166399916013083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,768,0.03583999872207642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,2560,0.0688704013824463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,512,0.012621866663297019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,4096,0.10562132994333903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,512,0.028189865748087566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,256,0.008787199854850769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,256,0.023028266429901124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,3584,0.09299413363138834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,128,0.007136000196139018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,128,0.020121600230534872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,3072,0.08101226488749186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,64,0.006073600053787232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,1536,0.046561066309611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,64,0.01949866612752279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,768,32,0.006333866715431213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,768,32,0.019499733050664266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,768,0.028142933050791425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,2048,0.05811413526535034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,16384,0.17523627281188964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,65536,0.689569091796875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,1024,0.033979733784993485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,12288,0.12872426509857177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,16384,0.5534037272135417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,12288,0.41712427139282227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,10240,0.10780800183614095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,65536,2.164678446451823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,10240,0.34997440973917643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,8192,0.08731733163197836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,256,0.01676586667696635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,8192,0.2835295995076498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,128,0.014468266566594442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,7168,0.07686613400777181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,768,512,0.02251733342806498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,7168,0.25150720278422034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,6144,0.06818880240122477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,6144,0.21836907068888345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,5120,0.05642559925715128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,5120,0.18494292894999187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,10240,0.21427200635274252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,12288,0.2570144017537435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,4096,0.04603413343429565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,4096,0.15167039235432941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,65536,1.3084587097167968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,3584,0.04117439985275269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,3584,0.13608640034993488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,3072,0.03537386655807495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,3072,0.11841599941253662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,7168,0.15344319343566895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,2560,0.029901866118113202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,5120,0.1107477347056071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,2560,0.09982826709747314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,16384,0.3394677480061849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,2048,0.02458560069402059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,6144,0.13257386684417724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,8192,0.1744128068288167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,2048,0.08067306677500406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,1536,0.019064533710479736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,1536,0.05909333229064941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,1024,0.014212266604105631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,1024,0.03912320137023926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,768,0.011890133221944172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,768,0.03228800098101298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,4096,0.09037546316782633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,512,0.009408000111579894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,512,0.026599466800689697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,3584,0.08021972974141439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,256,0.007095466554164887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,256,0.021514666080474854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,2560,0.05955413182576498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,128,0.006045866509278615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,128,0.01942080060640971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,3072,0.07031359672546386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,64,0.005262933174769084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,64,0.01848319967587789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,512,32,0.005526400109132131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,2048,0.050315733750661215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,512,32,0.017854932943979898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,65536,0.3876949310302734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,1536,0.03974826733271281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,16384,0.09932800134023032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,16384,0.5114282608032227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,1024,0.028174932797749835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,65536,2.017367426554362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,12288,0.07428906758626302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,12288,0.38749545415242515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,10240,0.061169068018595375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,768,0.023572266101837158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,10240,0.32505601247151694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,256,0.013868799805641175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,8192,0.04946346680323283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,128,0.01172266701857249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,512,512,0.018523732821146645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,7168,0.04352746804555257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,8192,0.26307199796040853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,7168,0.23187626202901207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,6144,0.038186665376027426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,6144,0.20148053169250488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,5120,0.032048000892003374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,5120,0.17092053095499676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,65536,1.1246037801106772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,10240,0.1839146614074707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,4096,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,12288,0.22074666023254394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,4096,0.14019947052001952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,3584,0.023602133989334105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,3584,0.12405227025349934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,16384,0.2911616007486979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,3072,0.020403200387954713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,3072,0.10852693716684977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,2560,0.016807466745376587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,2560,0.09153493245442709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,7168,0.13076480229695636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,2048,0.013677866260210673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,2048,0.07258346875508627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,5120,0.09381972948710124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,1536,0.010958932836850484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,1536,0.05198506514231364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,8192,0.14867413838704427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,1024,0.008478933572769165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,6144,0.11219627062479656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,1024,0.03528106609980265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,768,0.007342933118343354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,768,0.029150933027267456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,4096,0.07669333616892496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,512,0.006164266665776571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,2560,0.050551466147104894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,512,0.025062400102615356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,256,0.005028266708056132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,256,0.019547732671101888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,3584,0.06856106917063395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,128,0.00444160004456838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,128,0.017271467049916587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,2048,0.04200640122095744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,64,0.004138666639725367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,3072,0.059478398164113364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,64,0.01707093318303426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,256,32,0.004364799956480662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,256,32,0.017115734020868936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,65536,0.2810346603393555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,1536,0.03187626600265503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,16384,0.069814403851827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,768,0.018886399269104005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,16384,0.49962558746337893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,12288,0.053356798489888516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,65536,1.9769312540690105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,1024,0.02287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,12288,0.37868054707845056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,10240,0.04583679835001628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,10240,0.3185375849405924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,256,0.010647466778755188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,8192,0.03786666790644328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,128,0.00864533285299937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,256,512,0.014738133549690247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,8192,0.2585066636403402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,7168,0.033089067538579306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,7168,0.22773332595825196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,6144,0.02883946696917216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,6144,0.19714667002360026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,5120,0.025278933842976886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,16384,0.2741087913513184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,5120,0.16688213348388672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,65536,1.0535754521687826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,4096,0.021015467246373494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,4096,0.13645013173421222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,3584,0.018755199511845906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,3584,0.12050346533457439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,12288,0.20761706034342448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,3072,0.015541332960128783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,10240,0.17423680623372395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,3072,0.10439253648122152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,2560,0.010699733098347982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,2560,0.08741866747538249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,8192,0.139847469329834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,2048,0.008248533308506011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,2048,0.06907093524932861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,7168,0.12272746562957763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,1536,0.007271466652552287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,1536,0.04886293411254883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,6144,0.10551040172576905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,1024,0.00592853327592214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,1024,0.03280106584231059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,5120,0.08838933308919271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,768,0.005224533379077911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,4096,0.07175680001576742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,768,0.028599466880162554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,512,0.004613333443800608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,512,0.0233514666557312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,3072,0.05510186751683553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,256,0.004108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,256,0.018901334206263224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,3584,0.0639082670211792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,128,0.0036618667344252265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,128,0.01778879960378011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,2560,0.047364266713460286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,2048,0.03840746482213338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,64,0.003307733436425527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,64,0.016323199868202208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,128,32,0.003669333209594091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,128,32,0.016060800353686015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,1024,0.02073919971783956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,65536,0.27740907669067383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,16384,0.07067840099334717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,12288,0.055162668228149414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,16384,0.49842240015665695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,12288,0.3782464027404785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,10240,0.04352320035298665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,8192,0.035596799850463864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,1536,0.02909119923909505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,10240,0.31762879689534507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,7168,0.031803733110427855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,65536,1.9559125264485675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,8192,0.25650026003519694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,6144,0.02807040015856425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,768,0.016807466745376587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,7168,0.2263007958730062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,512,0.013379200299580892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,4096,0.020718934138615926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,5120,0.02372693419456482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,6144,0.19595519701639813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,5120,0.16636053721110028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,4096,0.13508159319559734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,3584,0.01835839947064718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,3072,0.014640000462532044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,3584,0.11987093289693196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,256,0.009543466567993163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8192,128,128,0.007527466615041096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,2560,0.010263466835021972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,3072,0.10350613594055176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,2560,0.08709973494211833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,2048,0.00901759962240855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,2048,0.06813440322875977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,1536,0.007377066711584728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,1536,0.04712959925333659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,1024,0.005860266586144766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,1024,0.03183573285738627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,768,0.00528959979613622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,768,0.02778880000114441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,512,0.00445119986931483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,512,0.023153066635131836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,256,0.003807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,256,0.018642133474349974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,128,0.003387733300526937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,128,0.016703999042510985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,64,0.0032405334214369455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,64,0.016051200032234193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,64,32,0.003470933437347412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,64,32,0.015505066514015198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,16384,0.06809600194295248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,65536,0.25877226193745934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,12288,0.05290453433990479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,10240,0.042933332920074466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,16384,0.49781014124552414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,12288,0.3771797180175781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,8192,0.0347925345102946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,10240,0.3162634531656901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,7168,0.030958932638168336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,8192,0.256057596206665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,6144,0.027056000630060834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,7168,0.22607359886169434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,5120,0.023857067028681435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,6144,0.19547200202941895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,4096,0.019898666938145956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,5120,0.16533013979593914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,65536,1.9540468851725261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,3584,0.017443199952443443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,4096,0.13518080711364747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,3072,0.014365866780281067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,3584,0.11908480326334636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,2560,0.0090421328941981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,3072,0.10325866540273029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,2048,0.008099199831485748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,2560,0.08657066822052002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,2048,0.06760640144348144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,1536,0.006682666639486949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,1536,0.04739840030670166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,1024,0.005251200000445048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,1024,0.032067199548085526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,768,0.004744533201058706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,768,0.02736639976501465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,512,0.004170666635036469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,512,0.02345493237177531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,256,0.0036629334092140196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,256,0.01880319913228353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,128,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,32,0.01534293293952942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,128,0.01676586667696635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,64,0.00296426663796107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8192,32,64,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8192,32,32,0.0029919999341169994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,10240,4.311269124348959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,12288,5.18176015218099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,16384,6.781029256184896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,10240,7.910407511393229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,12288,9.46012471516927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,12288,6.036053466796875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,8192,3.4612757364908853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,16384,8.039295959472657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,8192,6.374240112304688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,16384,12.703021240234374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,7168,3.0647722880045576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,10240,5.099759928385416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,6144,3.1209078470865887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,7168,6.269275919596354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,8192,4.045502980550131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,5120,2.1732607523600262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,6144,4.9381591796875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,5120,4.630496215820313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,7168,3.56575673421224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,4096,1.7558197021484374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,4096,2.1746976216634115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,4096,3.2540629069010416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,6144,3.0384981791178385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,3584,1.5539658864339194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,3584,2.9047999064127605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,3072,1.317192586263021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,3072,2.440116373697917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,2560,1.0394943873087565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,5120,2.5789919535319012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,2560,1.9945674896240235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,2048,0.8281109491984049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,2048,1.5416224161783854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,1536,0.6840192159016927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,1536,1.1957258860270181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,1024,0.8543285369873047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,3584,1.7939647674560546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,1024,0.5115242640177409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,768,0.41122986475626633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,768,0.6036170959472656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,3072,1.6004042307535808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,512,0.4199690818786621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,512,0.31097599665323894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,2560,1.2206485748291016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,256,0.26646080017089846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,256,0.21213119824727378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,128,0.20009387334187828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,256,0.42029441197713213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,128,0.19271039962768555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,2048,1.1357034047444663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,64,0.181440003712972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,64,0.18905173937479655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,65536,32,0.17879573504130047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,1536,0.8369824091593424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,65536,32,0.18732159932454426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,1024,0.6503637313842774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,768,0.573412259419759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,512,0.4706282615661621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,16384,1.8606922149658203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,65536,128,0.3678389231363932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,16384,3.1933738708496096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,12288,1.3973951975504557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,12288,2.39723637898763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,10240,1.1380821228027345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,10240,2.0420426686604816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,16384,2.0270837148030596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,65536,7.472231547037761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,12288,1.5108479817708333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,8192,0.9183008193969726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,8192,1.6381951649983724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,7168,0.7674570719401042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,7168,1.3475232442220053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,6144,1.2230091094970703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,6144,0.6769674936930339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,10240,1.2767562866210938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,5120,0.5672042846679688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,5120,0.9769269307454428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,5120,0.6214474360148112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,4096,0.4656405448913574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,4096,0.7363061269124349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,65536,8.30975799560547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,8192,0.9810378392537435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,3584,0.4094101270039876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,3584,0.6481056213378906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,7168,0.8106304168701172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,3072,0.34811518987019857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,65536,13.157579549153647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,3072,0.5992832183837891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,2560,0.45999787648518875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,2560,0.29416532516479493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,2048,0.2574175993601481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,2048,0.36976426442464194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,6144,0.7031413396199544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,1536,0.27702719370524087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,1536,0.2080842653910319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,1024,0.19992960294087728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,1024,0.1415221373240153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,768,0.14827946027119954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,768,0.11494186719258626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,4096,0.4872874577840169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,512,0.10779840151468914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,3584,0.43677012125651044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,512,0.08762986660003662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,256,0.06477760076522827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,3072,0.38093865712483727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,256,0.06534506479899088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,2560,0.3162186622619629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,128,0.04965653419494629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,2048,0.27581119537353516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,128,0.09919466972351074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,128,0.05947519938151041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,64,0.045482667287190755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,64,0.06057173411051432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,1536,0.22826879819234214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,16384,32,0.04629013140996297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,16384,32,0.05979839960734049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,1024,0.1760416030883789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,768,0.15437973340352376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,512,0.12739306290944416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,16384,1.349022928873698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,16384,256,0.10513599713643391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,16384,2.259686279296875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,12288,1.0139370600382487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,12288,1.6458836873372396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,10240,1.4437450408935546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,10240,0.880948257446289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,8192,0.6788938522338868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,8192,1.134600575764974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,65536,5.764623006184896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,16384,1.530505625406901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,8192,0.7284373601277669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,7168,0.9883637110392252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,7168,0.594098154703776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,12288,1.135279973347982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,6144,0.8490751902262369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,6144,0.5469354629516602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,5120,0.46226027806599934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,5120,0.7016170501708985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,10240,0.9248586654663086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,4096,0.36018880208333337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,4096,0.5395029067993165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,65536,9.334363810221355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,3584,0.46905174255371096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,3584,0.3131061236063639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,3584,0.42308801015218095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,65536,6.503671264648437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,3072,0.2627775986989339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,3072,0.39758294423421225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,7168,0.6360703786214192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,2560,0.22377279599507652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,2560,0.34082132975260415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,2048,0.2738197326660156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,6144,0.5431861241658529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,2048,0.1861237366994222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,1536,0.21009920438130697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,1536,0.15457173983256023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,5120,0.45370880762736004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,1536,0.17346879641215007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,1024,0.14437120755513508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,1024,0.11328852971394857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,768,0.11088213125864665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,768,0.09323519865671794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,768,0.12070399920145672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,4096,0.37742506663004555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,512,0.08166399796803793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,512,0.07127359708150229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,256,0.05009066661198934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,256,0.05412906805674235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,128,0.038840532302856445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,128,0.04964480002721151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,3072,0.2880234718322754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,2560,0.24955520629882813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,64,0.035913598537445066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,64,0.05045653184254965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,2048,0.21211840311686198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,12288,32,0.03639893531799317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,12288,32,0.05042133331298828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,1024,0.13478293418884277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,512,0.09768853187561036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,256,0.0812170664469401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,12288,128,0.0747925360997518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,16384,1.8713663736979167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,16384,1.1767691294352214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,12288,0.8697184244791666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,12288,0.9566421508789062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,12288,1.4228416442871095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,10240,1.1706347147623697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,10240,0.7360266367594401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,8192,0.5914197285970052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,8192,0.9470826466878256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,65536,4.847062174479166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,7168,0.5286197344462077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,7168,0.7638186772664388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,7168,0.5342293421427409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,16384,1.2167563120524088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,6144,0.4487253189086914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,6144,0.6784191767374674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,5120,0.5938101450602213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,5120,0.37763732274373374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,4096,0.3045098622639974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,4096,0.5007850646972656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,65536,8.022365824381511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,10240,0.7753760019938152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,3584,0.39520533879597985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,8192,0.6149269104003906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,3584,0.277782408396403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,3072,0.3385194778442383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,65536,5.399391174316406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,3072,0.24035733540852866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,2560,0.2872085253397624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,2560,0.2518517335255941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,2048,0.22891413370768227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,6144,0.4537194569905599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,2048,0.17310506502787273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,1536,0.17269226710001628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,5120,0.38050667444864905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,1536,0.13556159337361653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,1024,0.12246613502502442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,1024,0.10220906734466553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,768,0.09648319880167643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,4096,0.319054921468099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,768,0.08431466420491537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,3584,0.28018134435017905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,768,0.10105493068695068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,512,0.07089493274688721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,512,0.06720960140228271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,256,0.04655146598815918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,3072,0.24224747021993004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,256,0.05043306748072306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,128,0.033718399206797284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,2560,0.21163199742635092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,128,0.04433066844940185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,64,0.03012160062789917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,64,0.045094398657480876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,10240,32,0.031396265824635824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,2048,0.18089386622111003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,10240,32,0.044148266315460205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,1536,0.1478826681772868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,1024,0.11535786787668864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,16384,0.9489610671997071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,512,0.08198613325754801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,16384,1.502253850301107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,256,0.06758293310801188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,10240,128,0.06370986700057983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,12288,0.7265493392944335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,12288,1.1314165751139322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,10240,0.8994719823201498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,10240,0.6033674875895183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,65536,3.9708788553873697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,8192,0.7792490641276042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,8192,0.4911360104878743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,7168,0.6160138448079426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,7168,0.42940692901611327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,6144,0.5127658526102702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,65536,6.220935567220052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,16384,1.0451040267944336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,12288,0.74214293162028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,6144,0.3783935864766439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,5120,0.31596692403157556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,5120,0.509445317586263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,10240,0.6284224192301433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,4096,0.3520480155944824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,4096,0.2778047879536947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,65536,4.426063028971354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,3584,0.3049365361531576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,8192,0.49526398976643876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,3584,0.2411359945933024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,3072,0.2662965297698975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,3072,0.2041322708129883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,7168,0.42281068166097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,2560,0.2220991929372152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,2560,0.16533013979593914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,2560,0.17571199735005696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,2048,0.17771946589152018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,2048,0.13993172645568847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,6144,0.37199039459228517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,1536,0.13785386085510254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,5120,0.31180906295776367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,1536,0.11099200248718262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,1024,0.09585920174916586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,1024,0.0837440013885498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,768,0.07607786655426026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,4096,0.2608469327290853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,768,0.07006613413492838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,768,0.08278506596883138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,512,0.05576320091883341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,3584,0.22816960016886392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,512,0.055851733684539794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,256,0.036287999153137206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,256,0.04285759925842285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,3072,0.20231893857320152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,128,0.028191999594370527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,128,0.03870826562245687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,64,0.025204267104466754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,64,0.039026133219401044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,8192,32,0.025931733846664428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,8192,32,0.03927040100097656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,2048,0.14771733283996583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,1536,0.1213536024093628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,1024,0.09401493072509766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,512,0.06736853122711181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,16384,1.2529802958170573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,16384,0.8736352284749349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,256,0.055795200665791835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,8192,128,0.05189439853032431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,12288,0.6567935943603516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,12288,0.9581941604614258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,10240,0.7823818842569987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,10240,0.548199462890625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,65536,3.5987637837727866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,8192,0.44350401560465497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,8192,0.6551680246988932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,7168,0.5255071957906087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,7168,0.3932703971862793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,65536,5.478909810384115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,6144,0.45519574483235675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,6144,0.3473888079325358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,16384,0.9034208297729492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,12288,0.6681151707967122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,5120,0.3829397201538086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,5120,0.291106128692627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,4096,0.3092351913452148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,10240,0.5511360168457031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,4096,0.23234880765279137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,65536,3.8278038024902346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,3584,0.274996280670166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,3584,0.2043999989827474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,8192,0.4425354639689128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,3584,0.21593920389811197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,3072,0.23569280306498208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,3072,0.17707840601603192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,2560,0.19115626017252604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,2560,0.15281707445780437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,2048,0.15495999654134113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,2048,0.1271722634633382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,7168,0.3816629409790039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,1536,0.12090880076090496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,1536,0.10193493366241455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,1024,0.08703359762827555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,6144,0.3365728060404459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,1024,0.0785045305887858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,1024,0.09361813068389893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,5120,0.27766507466634116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,768,0.06747413476308187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,768,0.06626346508661905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,512,0.050164266427357995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,512,0.051837865511576334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,4096,0.23184000651041664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,256,0.03244266708691915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,256,0.039661868413289385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,128,0.024628265698750814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,128,0.03746560017267863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,3072,0.18186666170756022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,2560,0.154694398244222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,64,0.02327679991722107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,64,0.0373962680498759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,2048,0.1340138594309489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,7168,32,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,7168,32,0.03676799933115642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,1536,0.11091626485188802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,768,0.07629653612772623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,16384,1.1065738677978516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,512,0.061451733112335205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,256,0.05086613496144613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,16384,0.7849866867065429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,7168,128,0.04720960060755412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,12288,0.8148309071858725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,65536,3.1917930603027345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,12288,0.5915413538614909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,65536,3.3737716674804688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,10240,0.6509749094645183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,10240,0.49653011957804366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,8192,0.5262922604878744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,8192,0.41025813420613605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,65536,4.767544555664062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,8192,0.39117228190104164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,7168,0.45293547312418625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,7168,0.36072638829549153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,6144,0.3948842684427897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,6144,0.3111882527669271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,5120,0.3228373209635417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,5120,0.27809600830078124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,16384,0.7653226852416992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,4096,0.27241493860880533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,4096,0.2120522658030192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,3584,0.2408437410990397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,3584,0.1841983954111735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,12288,0.5817087809244792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,3072,0.20106132825215658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,3072,0.18015680313110352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,10240,0.4761962572733561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,2560,0.168068265914917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,2560,0.1503925323486328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,2048,0.137773863474528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,7168,0.3306976000467936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,2048,0.1178005297978719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,1536,0.1046336015065511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,6144,0.2913087844848633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,1536,0.09369920094807943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,1536,0.09873387018839518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,1024,0.07371946970621744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,5120,0.2448362668355306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,1024,0.07117119630177816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,4096,0.20230186780293785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,768,0.05838613510131836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,768,0.05998080174128214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,512,0.04368853171666463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,512,0.047703464825948075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,256,0.02831253409385681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,3584,0.1787882645924886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,256,0.03933333158493042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,3072,0.1576533317565918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,128,0.021530665953954062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,128,0.036084266503651936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,64,0.02023786703745524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,2560,0.13658347129821777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,64,0.03554666837056478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,6144,32,0.02074986696243286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,6144,32,0.035259731610616046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,2048,0.11476266384124756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,1024,0.07367573579152426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,768,0.06427199840545654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,512,0.052748799324035645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,256,0.04341333309809367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,16384,0.9109088261922201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,16384,0.6838922500610352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,6144,128,0.0405290683110555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,65536,2.7935040791829424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,12288,0.6390015920003255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,12288,0.5289994557698567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,10240,0.5461589177449544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,10240,0.4431616147359212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,65536,3.9558560689290365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,8192,0.4383701324462891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,8192,0.3664554595947266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,7168,0.3291562716166178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,7168,0.3991296132405599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,6144,0.32923625310262045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,6144,0.27893867492675783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,5120,0.2747136116027832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,16384,0.6658837636311848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,5120,0.23854079246520996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,5120,0.22185494105021158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,65536,2.8863604227701822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,12288,0.48378346761067703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,4096,0.22224319775899254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,4096,0.19315733909606933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,3584,0.19623573621114093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,10240,0.40194346110026047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,3584,0.16485546429951986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,3072,0.17009706497192384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,3072,0.14371733665466307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,8192,0.3322378794352213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,2560,0.14167680740356445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,2560,0.12397332986195882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,7168,0.29018774032592776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,2560,0.11812799771626789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,2048,0.11469013690948486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,2048,0.10454933643341065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,1536,0.08950080076853434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,1536,0.08621866703033447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,1024,0.06346346537272135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,6144,0.25243306159973145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,1024,0.06548266808191935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,768,0.050774399439493814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,768,0.05544106562932333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,4096,0.17338026364644368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,512,0.03839786847432454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,3584,0.1564352035522461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,512,0.045848532517751055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,256,0.026345600684483845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,3072,0.13730986913045246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,256,0.03626240094502767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,128,0.019885865847269694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,128,0.031850665807724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,2048,0.10082666873931885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,64,0.016583466529846193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,1536,0.08455359935760498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,64,0.03167999982833862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,5120,32,0.01729493339856466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,5120,32,0.03173013329505921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,1024,0.06504106521606445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,768,0.0570090651512146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,16384,0.7001866658528646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,512,0.045797332127888994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,16384,0.5844821294148763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,256,0.03779946565628052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,12288,0.5076757431030273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,5120,128,0.034789331754048664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,12288,0.4437354723612468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,65536,2.373666127522786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,65536,3.0545514424641924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,10240,0.4351178805033366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,10240,0.37183361053466796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,8192,0.3435765266418457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,8192,0.33419094085693357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,7168,0.29440746307373045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,7168,0.2764394760131836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,6144,0.25374612808227537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,6144,0.23695146242777504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,5120,0.21802560488382974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,5120,0.1988874594370524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,16384,0.5423957188924153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,12288,0.4062815984090169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,65536,2.4032948811848955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,4096,0.19011732737223308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,4096,0.1613994598388672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,3584,0.1560383955637614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,10240,0.3365290641784668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,3584,0.14221760431925456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,8192,0.27463359832763673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,3072,0.13225173155466716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,3072,0.12327466805775959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,2560,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,7168,0.2396735986073812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,2560,0.10631999969482422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,2048,0.09060479799906412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,6144,0.20631146430969238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,2048,0.08984639644622802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,1536,0.06985066731770834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,1536,0.0735530694325765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,1024,0.05031466484069824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,5120,0.17740480105082196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,1024,0.05661973158518473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,768,0.04037653207778931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,768,0.04772693316141764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,4096,0.1464010715484619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,512,0.030946133534113567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,3584,0.12963093121846517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,512,0.04002666473388672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,3072,0.1128981351852417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,256,0.021669334173202513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,256,0.031883732477823896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,2560,0.098526930809021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,128,0.016078933080037435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,128,0.02855679988861084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,2048,0.08430293401082357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,64,0.013946666320164999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,1536,0.0679253339767456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,4096,32,0.015188266833623251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,64,0.02775040070215861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,4096,32,0.02723413308461507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,1024,0.052750933170318606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,768,0.046037332216898603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,16384,0.6466570536295573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,512,0.0381877342859904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,16384,0.5665546417236328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,256,0.031027199824651082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,12288,0.4860341389973958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,65536,2.243951924641927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,12288,0.4323082605997722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,4096,128,0.028244266907374065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,65536,2.8863039652506512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,12288,0.3654549280802409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,10240,0.40075200398763017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,10240,0.3675562540690104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,8192,0.31808319091796877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,8192,0.3078346570332845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,7168,0.2836223920186361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,7168,0.2619392077128092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,6144,0.2393514633178711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,6144,0.23048639297485352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,5120,0.2042784055074056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,5120,0.19505386352539061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,16384,0.49760001500447587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,65536,2.1158997853597006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,4096,0.166539732615153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,4096,0.1876757303873698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,3584,0.143012269337972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,10240,0.30585813522338867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,3584,0.13912426630655925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,8192,0.24861013094584145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,3072,0.1231551965077718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,7168,0.218559996287028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,3072,0.12183146476745606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,2560,0.1053440014521281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,2560,0.10537919998168946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,6144,0.1902762730916341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,2048,0.08594240347544352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,2048,0.08902400334676107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,1536,0.06698453426361084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,2048,0.07721066474914551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,1536,0.07274346351623535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,1024,0.048495999972025555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,1024,0.055452799797058104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,5120,0.15969707171122233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,768,0.03887253204981486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,768,0.04307733376820882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,512,0.024234666426976522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,4096,0.13350613911946613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,512,0.0353877345720927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,256,0.016961065928141277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,256,0.030666667222976684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,256,0.02858026623725891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,3584,0.11758506298065186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,128,0.014439466595649719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,128,0.026104533672332765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,3072,0.1034773349761963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,64,0.013111467162768045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,2560,0.08833920160929362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3584,32,0.013927466670672097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,64,0.026793599128723145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3584,32,0.02656960090001424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,1536,0.06341439882914225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,1024,0.049388798077901204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,768,0.04284053246180217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,16384,0.5102464040120442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,16384,0.5004128138224284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,512,0.034686934947967527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,12288,0.3865344047546387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,12288,0.3800650596618652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,65536,1.9666720072428387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,65536,2.3281150817871095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,10240,0.3231114705403646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,10240,0.3203850746154785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3584,128,0.026012800137201947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,8192,0.2690218607584635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,8192,0.26515520413716637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,7168,0.22774079640706382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,7168,0.23939199447631837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,6144,0.19627092679341634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,6144,0.20504320462544762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,5120,0.16559680302937824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,5120,0.17100906372070312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,16384,0.42749439875284834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,65536,1.8278911590576172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,4096,0.13185813426971435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,12288,0.3249141375223795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,4096,0.1521557331085205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,3584,0.11810026963551838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,10240,0.26879253387451174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,3584,0.12675519784291583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,3072,0.09971840381622314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,8192,0.22051199277242026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,3072,0.10923733711242675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,2560,0.0846005360285441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,7168,0.19598506291707357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,2560,0.09508480230967203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,2048,0.06988373597462973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,6144,0.16987306276957195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,2048,0.08064640363057454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,2048,0.06856853167215983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,1536,0.054396800200144445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,1536,0.06498346726099649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,1024,0.03934933344523112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,1024,0.049133865038553874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,5120,0.1432970682779948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,768,0.031941332419713336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,768,0.041442131996154784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,768,0.0385045329729716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,512,0.024495999018351235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,512,0.035130667686462405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,256,0.01763199965159098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,4096,0.11779839992523193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,256,0.028708267211914062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,128,0.013198933005332947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,3584,0.10604266325632732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,128,0.025197867552439374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,3072,0.09137279987335205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,64,0.011183999975522359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,64,0.02483946681022644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,3072,32,0.011522133151690166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,2560,0.07900479634602865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,3072,32,0.024754132827123007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,1536,0.056287999947865805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,1024,0.04337600072224935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,16384,0.4235264142354329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,512,0.030426667133967085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,65536,1.8707455952962238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,65536,1.785971196492513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,16384,0.45959679285685223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,256,0.02490239938100179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,12288,0.35431572596232097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,12288,0.3480469385782877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,3072,128,0.022629332542419434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,10240,0.2629013379414876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,10240,0.29869651794433594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,10240,0.2390122731526693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,8192,0.21442987124125162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,8192,0.23760533332824707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,7168,0.18723626136779786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,7168,0.21028052965799965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,6144,0.16217066446940104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,6144,0.18249066670735675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,5120,0.13644693692525228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,5120,0.1548181374867757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,5120,0.12479573090871174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,4096,0.10727252960205078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,4096,0.1262463967005412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,3584,0.09716479778289795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,16384,0.3802015940348307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,65536,1.6344842274983722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,3584,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,12288,0.2877589225769043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,3072,0.08848959604899088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,3072,0.09790506362915039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,2560,0.07500480016072592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,2560,0.0839242696762085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,8192,0.19487786293029785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,2048,0.057886934280395506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,7168,0.17112107276916505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,2048,0.0708021322886149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,1536,0.04421226580937703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,1536,0.056227199236551915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,6144,0.14992106755574544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,1024,0.03222613334655762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,1024,0.042073599497477215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,768,0.025595732529958087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,768,0.03559039831161499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,4096,0.10298559665679932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,512,0.019399466117223103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,512,0.029706666866938274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,3584,0.09224106470743815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,256,0.013127467036247254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,3072,0.08102719783782959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,256,0.02491413354873657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,128,0.010021332899729412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,2560,0.07001067002614339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,128,0.022190932432810465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,2048,0.05995626846949259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,64,0.008643200000127155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,64,0.021939200162887574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2560,32,0.009034666419029235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,1536,0.049610666433970135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2560,32,0.021740800142288207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,1024,0.03790719906489055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,16384,0.331440003712972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,768,0.03251306613286336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,16384,0.4105632146199544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,16384,0.32447360356648763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,512,0.02624853253364563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,65536,1.4495445251464845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,12288,0.24995840390523277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,65536,1.590571721394857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,12288,0.31182934443155924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,10240,0.21672213872273766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,256,0.02195626695950826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,10240,0.2692330678304037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,8192,0.16905280749003093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,8192,0.22001919746398926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2560,128,0.01967573364575704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,7168,0.14797439575195312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,7168,0.1891306718190511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,7168,0.14902933438618976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,6144,0.12827093601226808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,6144,0.16583360036214193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,5120,0.10852373441060384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,5120,0.13996586799621583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,5120,0.10819199879964192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,4096,0.08806186517079671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,4096,0.1144437313079834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,3584,0.07752853234608968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,3584,0.10168213049570721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,3072,0.06650559902191162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,3072,0.08783573309580485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,65536,1.3886751810709634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,12288,0.24288533528645834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,2560,0.06502399841944376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,2560,0.07483733495076497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,10240,0.20625173250834145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,2048,0.04541973272959392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,2048,0.06288533210754395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,8192,0.1683989365895589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,1536,0.03559146722157796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,1536,0.048971732457478837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,6144,0.12750399907430013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,1024,0.02555946707725525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,1024,0.03677759965260823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,4096,0.08912639617919922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,768,0.020869332551956176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,768,0.03172373374303182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,3072,0.06924906571706137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,512,0.015559466679890952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,2560,0.060591999689737955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,512,0.027137066920598345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,512,0.02225173314412435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,256,0.01093226671218872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,256,0.022348799308141074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,2048,0.05180373191833496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,128,0.008390399813652038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,128,0.02050986687342326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,64,0.0072512000799179075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,64,0.020103466510772706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,2048,32,0.007594666878382365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,1536,0.041790934403737386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,2048,32,0.02013333241144816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,3584,0.07853546937306723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,1024,0.03141013383865356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,16384,0.2622783978780111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,65536,1.0460021336873373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,768,0.02752853234608968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,16384,0.3712160110473633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,12288,0.2191487948099772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,65536,1.4506122589111328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,12288,0.2833717346191406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,10240,0.18242133458455403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,10240,0.23924372990926107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,8192,0.13304853439331055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,8192,0.19521172841389972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,256,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,7168,0.1172650655110677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,2048,128,0.016416000326474507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,7168,0.17223787307739258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,6144,0.09920533498128256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,6144,0.15028479894002278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,5120,0.08241066932678223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,5120,0.1272010644276937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,65536,1.083777109781901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,4096,0.06842880249023438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,12288,0.2069002628326416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,4096,0.10554666519165039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,10240,0.17258772850036622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,3584,0.06021973292032877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,3584,0.09192426999409994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,8192,0.14326720237731932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,3072,0.05167680184046427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,3072,0.07960533301035563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,2560,0.04404693444569906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,7168,0.12573119799296062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,2560,0.067630934715271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,6144,0.10869332949320476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,2048,0.03588373263676961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,2048,0.05456426541010538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,1536,0.02758293350537618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,5120,0.0908245325088501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,1536,0.04220586617787679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,1024,0.020191999276479085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,16384,0.27426026662190756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,1024,0.03261013428370158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,768,0.01613759994506836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,768,0.028808534145355225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,4096,0.07505706946055093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,768,0.02299840052922567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,512,0.012417067090670269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,512,0.024361600478490196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,3584,0.06737173398335775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,256,0.008753066261609394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,256,0.020940800507863365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,3072,0.05884693463643392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,128,0.0070698668559392285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,128,0.019454934199651084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,2560,0.051077334086100254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,64,0.006117333471775055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,64,0.019321600596110024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1536,32,0.006379733482996623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,2048,0.0436245322227478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1536,32,0.01890773375829061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,1536,0.03470613161722819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,65536,0.6569909413655599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,16384,0.1647978623708089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,16384,0.32861226399739585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,65536,1.2536735534667969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,12288,0.12632426420847576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,12288,0.24842346509297691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,512,0.0186901330947876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,10240,0.1074677308400472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,256,0.015101866920789084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,10240,0.20972053209940592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,8192,0.08514346281687418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,128,0.013883733749389648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,8192,0.17115839322408039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,7168,0.07568746407826742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,7168,0.15244266192118328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1536,1024,0.026549333333969118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,6144,0.06577920118967692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,65536,0.8626986821492514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,16384,0.22094507217407228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,6144,0.13243306477864583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,5120,0.053973333040873206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,5120,0.11222933133443196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,12288,0.16948053042093914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,4096,0.04522026777267456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,4096,0.09114879767100016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,4096,0.06079039971033732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,3584,0.039562666416168214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,10240,0.14146560033162434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,3584,0.07892800172170003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,3584,0.05480320056279501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,3072,0.03424959977467855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,8192,0.11521706581115723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,3072,0.0675818681716919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,2560,0.029203200340270997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,2560,0.05662399927775065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,2048,0.024178133408228556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,2048,0.04543040196100871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,7168,0.10224213600158691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,1536,0.019102933009465535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,1536,0.03616213401158651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,1024,0.014006400108337402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,1024,0.02922240098317464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,6144,0.08900372982025147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,768,0.011699199676513672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,5120,0.07397440274556479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,768,0.02579733331998189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,512,0.009272533655166625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,512,0.022373332579930624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,256,0.0069909334182739254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,256,0.018739199638366698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,3072,0.04808853467305501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,128,0.005910400052865346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,128,0.017926400899887084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,2048,0.0346560001373291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,64,0.005085866649945577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,1536,0.027863466739654542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,64,0.017782400051752724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,1024,32,0.005387733379999796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,1024,0.021116799116134642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,1024,32,0.017384533087412515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,768,0.018345600366592406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,65536,0.5158303896586101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,512,0.014939733346303306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,16384,0.1310869296391805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,12288,0.0993664026260376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,16384,0.3183594703674316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,65536,1.1767637888590494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,256,0.012009599804878235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,12288,0.23263360659281412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,10240,0.08388266563415528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,2560,0.04150720040003459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,10240,0.19780267079671224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,8192,0.06752320130666098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,8192,0.16179413795471193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,7168,0.059388800462087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,1024,128,0.011096533139546711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,7168,0.14368640581766765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,6144,0.05079786777496338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,6144,0.12478933334350586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,5120,0.04351786772410075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,5120,0.1053546667098999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,12288,0.15875627199808756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,4096,0.03459626833597819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,4096,0.08480746746063232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,16384,0.20971306165059408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,3584,0.030477867523829145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,65536,0.845303471883138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,3584,0.07379626433054606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,3072,0.026809600989023845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,6144,0.08221440315246582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,3072,0.06252906719843546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,10240,0.13345492680867513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,2560,0.02305813431739807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,2560,0.051327999432881674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,2048,0.018778665860493978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,7168,0.09617493152618409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,2048,0.04175999959309896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,1536,0.014842666188875833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,1536,0.034279465675354004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,8192,0.10982933044433593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,1024,0.011346133550008138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,1024,0.027126399676005046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,3072,0.04434773524602254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,768,0.009593600034713745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,4096,0.05720533529917399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,768,0.025410133600234985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,512,0.007794133325417836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,512,0.021805866559346517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,1536,0.024539732933044435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,256,0.005919999877611796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,2560,0.038149333000183104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,3584,0.051164801915486655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,256,0.018402133385340372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,128,0.004990933338801066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,128,0.017499732971191406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,1024,0.019233065843582153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,2048,0.031267199913660684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,64,0.004469333092371622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,64,0.016746666034062704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,5120,0.07042346795399984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,768,32,0.004738133152325948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,768,32,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,65536,0.35650774637858074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,16384,0.09354773362477621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,65536,1.1040735880533854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,16384,0.28588266372680665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,12288,0.0706816037495931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,12288,0.21685439745585122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,10240,0.0582698663075765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,10240,0.18382827440897626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,10240,0.10923199653625489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,256,0.010309333602587383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,8192,0.0469215989112854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,512,0.01344106694062551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,128,0.009134933352470398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,8192,0.15022826194763184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,7168,0.041169067223866776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,768,768,0.015784533818562825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,7168,0.07797653675079345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,7168,0.13338027000427247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,65536,0.661296017964681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,6144,0.03576639890670776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,6144,0.11593493620554607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,5120,0.03069546620051066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,5120,0.09907626310984294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,5120,0.05720959901809693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,4096,0.024549333254496257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,4096,0.07891093095143636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,16384,0.17260586420694987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,3584,0.021364265680313112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,3584,0.06812907059987386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,3072,0.01863893270492554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,3072,0.05757866700490316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,3072,0.03652906815210978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,2560,0.01612053314844767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,2560,0.046888534228007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,2048,0.013404800494511922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,2048,0.03777173360188802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,1536,0.01095360020796458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,1536,0.03236053387324016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,8192,0.08882559935251871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,1024,0.008550399541854858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,6144,0.06785386403401693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,1024,0.025604265928268432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,768,0.007274666428565979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,768,0.023194666703542074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,4096,0.047508267561594646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,512,0.006209066510200501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,3584,0.04225706656773885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,512,0.020281600952148437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,12288,0.13052480220794677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,256,0.005017599960168203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,256,0.01801919937133789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,2560,0.030877866347630817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,128,0.004444799820582072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,2048,0.025849600632985432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,128,0.01684053341547648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,64,0.004074666649103165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,64,0.016302933295567833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,512,32,0.004293333490689596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,1536,0.02068693240483602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,512,32,0.016357333461443583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,65536,0.19633599917093914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,16384,0.054722134272257486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,1024,0.016122666994730632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,16384,0.27083520889282225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,65536,1.0218016306559243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,12288,0.04262826840082805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,768,0.01378666659196218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,12288,0.204803196589152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,10240,0.038065067927042645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,512,0.011268267035484314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,10240,0.17343146006266277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,8192,0.034932267665863034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,256,0.008922666311264038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,8192,0.1438528060913086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,7168,0.02810773253440857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,7168,0.12740373611450195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,6144,0.02453440030415853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,6144,0.10983466307322184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,512,128,0.00830080012480418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,5120,0.021559466918309532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,5120,0.09292159875233968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,16384,0.15179626146952313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,65536,0.5883722941080729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,4096,0.018627200524012247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,4096,0.07259840170542399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,3584,0.016099199652671814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,12288,0.11471680005391438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,3584,0.06273173491160075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,3072,0.013363200426101684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,10240,0.09545706907908122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,3072,0.05179839928944906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,8192,0.07804586887359619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,2560,0.012129066387812297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,7168,0.06919573148091634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,2560,0.04209280014038086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,2048,0.009686400492986042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,6144,0.059879465897878015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,2048,0.03489280144373576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,1536,0.008436266581217449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,1536,0.02911253372828166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,1024,0.006311466793219249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,1024,0.024297600984573363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,4096,0.04199893474578857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,768,0.005654400090376536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,5120,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,768,0.022291199366251627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,512,0.00491839994986852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,512,0.01999573310216268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,3072,0.03158506751060486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,256,0.003980800012747447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,256,0.017198934157689413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,3584,0.03727039893468221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,128,0.0036490666369597114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,128,0.015991466244061787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,2048,0.022660267353057862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,64,0.003373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,64,0.015396266182263692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,256,32,0.003735466549793879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,2560,0.027369600534439088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,256,32,0.015777066349983215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,65536,0.15874667167663575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,65536,0.9951871871948242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,1536,0.018387200435002644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,16384,0.04242986838022868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,16384,0.26188799540201824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,1024,0.013929599523544311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,12288,0.03295466701189677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,12288,0.19719252586364747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,512,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,768,0.011909332871437073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,10240,0.02922240098317464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,10240,0.1671456019083659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,8192,0.02368320027987162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,256,0.0077248002092043565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,8192,0.13659946123758954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,7168,0.019242666165033975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,7168,0.12082133293151856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,6144,0.01684160033861796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,256,128,0.00699946681658427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,6144,0.10543039639790852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,5120,0.011758933464686077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,5120,0.08840320110321045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,65536,0.5660640080769856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,4096,0.010073600212732951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,4096,0.06885440349578857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,16384,0.14786559740702312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,3584,0.00941546658674876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,3584,0.05916266838709513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,12288,0.11096426645914714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,3072,0.00808426688114802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,10240,0.09413973490397134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,3072,0.04839680194854736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,2560,0.007355733215808869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,8192,0.07526400089263915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,2560,0.039928531646728514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,2048,0.00655680000782013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,7168,0.06690986951192221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,2048,0.03388373454411824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,1536,0.005743999779224396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,6144,0.05784106651941935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,1536,0.028676267464955645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,1024,0.004936533172925314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,5120,0.04938666820526123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,1024,0.02352959911028544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,768,0.004542933404445648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,768,0.021237333615620933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,4096,0.040343467394510904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,512,0.0038751999537150065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,512,0.018761599063873292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,3072,0.02993599971135457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,256,0.0034730667869249977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,3584,0.03564266761144002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,256,0.0166485329469045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,128,0.0031968000034491217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,128,0.015436800320943198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,2048,0.021150932709376017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,2560,0.025911466280619307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,64,0.003048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,1024,0.013063466548919678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,128,32,0.0034613333642482757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,64,0.015547733505566916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,128,32,0.015369600057601929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,65536,0.1505557378133138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,1536,0.017191465695699057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,16384,0.04024746815363566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,12288,0.031676799058914185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,16384,0.25903679529825846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,10240,0.02827626665433248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,12288,0.19638187090555828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,8192,0.023785599072774253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,65536,0.9827285130818686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,10240,0.16632426579793294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,8192,0.13572160402933758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,7168,0.018346667289733887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,7168,0.12014826933542888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,6144,0.01560640037059784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,5120,0.01071679989496867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,6144,0.10403733253479004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,5120,0.08794879913330078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,4096,0.008565333485603333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,3584,0.007981866598129272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,4096,0.06794346968332926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,3072,0.007314133147398631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,3584,0.05826773246129354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,512,0.009026133020718892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,2560,0.007162666817506154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,3072,0.04811306794484456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,2560,0.04013653198877971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,2048,0.0062837332487106325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,2048,0.03276693423589071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,1536,0.005407999952634176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,1536,0.028037333488464357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,1024,0.004571733375390371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,1024,0.023891200621922813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,768,0.021057067314783733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,768,0.004163199911514918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,512,0.01944213310877482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,256,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,512,0.003719466676314672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,128,0.006274133423964183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,128,0.016219733158747356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,256,0.0033919999996821085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,256,0.016307199994723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,32,0.0030080000559488933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,128,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,64,64,0.002962133288383484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,64,0.015246933698654175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4096,128,768,0.011016533772150675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,64,32,0.014822399616241455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,65536,0.13496960004170735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,16384,0.03773653507232666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,12288,0.02927359938621521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,8192,0.022127999862035116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,16384,0.25719146728515624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,10240,0.025570134321848553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,12288,0.196835199991862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,10240,0.16601066589355468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,65536,0.9846165339152018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,7168,0.019346133867899577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,8192,0.1358751932779948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,7168,0.12053439617156983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,6144,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,5120,0.010853333274523418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,6144,0.10506986776987712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,5120,0.08736106554667154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,4096,0.010066133737564088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,4096,0.06745279630025228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,3584,0.009000533819198608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,3584,0.05657920042673746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,3072,0.008193066716194153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,3072,0.047030401229858396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,2560,0.0061706667145093284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,2560,0.038194131851196286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,2048,0.005447466671466827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,2048,0.03208640019098918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,1536,0.004823466638724009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,1536,0.027266132831573486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,1024,0.004158933212359746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,1024,0.02318293253580729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,768,0.003993600110212962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,768,0.020939733584721884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,512,0.0035487999518712364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,512,0.0185973326365153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,256,0.003236266722281774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,256,0.01626240015029907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,128,0.002889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,128,0.015596800049146018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,64,0.0028586665789286296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,64,0.015224533279736838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4096,32,32,0.002976000060637792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4096,32,32,0.014889599879582724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,10240,2.155016581217448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,12288,2.5717343648274738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,16384,3.4604565938313803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,16384,3.9978240966796874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,10240,3.9389984130859377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,12288,4.725754801432291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,8192,1.749303436279297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,16384,6.4142110188802075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,8192,3.2075146993001304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,7168,1.4083754221598306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,12288,2.9684171040852863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,7168,2.7642496744791667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,6144,1.2550762176513672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,6144,1.511523183186849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,6144,2.428088633219401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,5120,1.0063648223876953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,5120,2.0192330678304033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,10240,2.4940330505371096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,8192,2.0219083150227863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,4096,0.7873301188151042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,4096,1.5063765207926432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,3584,0.6960789362589519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,3584,1.40927365620931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,7168,1.7678624471028646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,3072,0.6272213617960612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,3072,1.1850079854329427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,2560,0.5167786598205566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,2560,0.9948991775512696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,2048,0.772871462504069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,2048,0.43410132726033523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,4096,0.9936384201049805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,1536,0.5633216222127279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,1536,0.3839040120442708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,3584,0.8273877461751302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,1024,0.37260481516520183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,1024,0.24562026659647623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,3072,0.7127690633138021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,768,0.29204587936401366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,768,0.20222934087117514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,512,0.21507412592569985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,512,0.16086293856302897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,2560,0.6070314407348633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,256,0.13341760635375977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,512,0.24429866472880044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,256,0.11573226451873779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,256,0.20077759424845376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,128,0.10357226530710857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,5120,1.2365898132324218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,128,0.10416320164998372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,64,0.09526933034261068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,2048,0.5248767852783203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,65536,32,0.09063999652862549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,32,0.10642240047454835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,65536,64,0.10775360266367595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,1536,0.4212618509928386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,1024,0.32088747024536135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,16384,0.8658133188883463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,768,0.2916362762451172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,16384,1.4881610870361328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,12288,0.6375647862752278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,65536,128,0.1867242654164632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,12288,1.1554720560709635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,10240,0.5332607905069987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,10240,0.9081898371378581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,65536,3.6822176615397133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,8192,0.4334560076395671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,8192,0.694045893351237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,7168,0.3819242795308431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,7168,0.6358912150065105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,16384,0.9170901616414389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,6144,0.535102907816569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,6144,0.3360032081604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,65536,6.420385233561198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,6144,0.36119254430135095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,5120,0.42666880289713544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,5120,0.332207997639974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,10240,0.5939957300821941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,4096,0.3501152038574219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,4096,0.23012266159057618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,65536,4.112188720703125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,3584,0.30749759674072263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,3584,0.20212906201680503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,8192,0.46335573196411134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,3072,0.2613973299662272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,3072,0.17533013025919597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,2560,0.22294507026672364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,2560,0.15155733426411946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,2560,0.16644694010416666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,2048,0.18197760581970215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,12288,0.7028095881144206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,7168,0.4082506815592448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,2048,0.12736000219980875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,1536,0.14658133188883465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,1536,0.1042080005009969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,1024,0.09949653148651123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,1024,0.07805973688761393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,768,0.07632959683736165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,5120,0.29581867853800453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,768,0.06550079981486003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,512,0.05584746599197388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,4096,0.2467871983846029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,512,0.052475734551747644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,256,0.0358517328898112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,3584,0.21741760571797691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,256,0.0413482666015625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,3072,0.19010880788167317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,128,0.028198399146397907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,128,0.038379732767740884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,64,0.025265065828959148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,64,0.039204267660776775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,2048,0.13989334106445311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,16384,32,0.02651626666386922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,1536,0.1172928015391032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,16384,32,0.03903146584828694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,768,0.08031786282857259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,512,0.06618560155232747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,256,0.0556330680847168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,16384,1.07894287109375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,16384,0.6606538772583008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,128,0.051992531617482504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,65536,2.8016011555989584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,12288,0.5113471984863281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,16384,1024,0.09027733008066813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,12288,0.8102485020955404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,12288,0.5308053334554036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,10240,0.41677548090616867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,10240,0.6654442469278972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,10240,0.449234135945638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,65536,4.692891947428385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,8192,0.3433034578959147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,8192,0.5385845184326172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,7168,0.4770762761433919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,7168,0.3079456011454264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,6144,0.3838752110799154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,6144,0.26576107343037925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,5120,0.3462474822998047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,5120,0.21989653905232748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,16384,0.7054847717285156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,4096,0.2639882723490397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,4096,0.18901972770690917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,65536,3.2200490315755212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,3584,0.22999040285746256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,3584,0.17968533833821615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,3584,0.17498772939046223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,3072,0.1999285380045573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,3072,0.14441919326782227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,3072,0.15458240509033203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,8192,0.3596672058105469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,2560,0.16786346435546876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,2560,0.12288426558176677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,2048,0.13689813613891602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,2048,0.1024074633916219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,2048,0.10895360310872396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,1536,0.10547093550364177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,1536,0.08306453227996827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,6144,0.26681493123372396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,1024,0.0741055965423584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,1024,0.06451840003331502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,768,0.05861119826634725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,5120,0.22364907264709472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,768,0.05461440086364746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,512,0.043211734294891356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,512,0.04539200067520142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,256,0.028708267211914062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,4096,0.18778133392333984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,256,0.03744106690088908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,128,0.021719467639923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,128,0.03539093335469563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,7168,0.3137738545735677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,64,0.020540799697240195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,2560,0.126364803314209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,64,0.035037867228190106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,12288,32,0.02086826761563619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,1536,0.0890666643778483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,12288,32,0.03519466718037923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,1024,0.06970240275065104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,768,0.061673601468404136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,512,0.050819198290506996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,16384,0.9099946975708008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,16384,0.5769269307454427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,256,0.04283946752548218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,16384,0.5866432189941406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,12288,0.43679466247558596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,12288,0.6860085169474284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,10240,0.5396479924519857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,65536,2.418377685546875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,10240,0.3700298627217611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,8192,0.4347423871358235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,8192,0.3016725222269694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,8192,0.3113792101542155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,7168,0.3800842603047689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,65536,3.955945587158203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,7168,0.2605973402659098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,6144,0.3300575892130534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,6144,0.25564799308776853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,5120,0.27525440851847327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,5120,0.2003647963205973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,4096,0.22136106491088867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,12288,128,0.04047893285751343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,12288,0.4491615931193034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,4096,0.1584448019663493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,4096,0.15983680089314778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,3584,0.20186986923217773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,3584,0.14246613184611004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,10240,0.36931838989257815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,3584,0.14158186912536622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,3072,0.1755370616912842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,3072,0.12651413281758625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,2560,0.1434442679087321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,2560,0.11027519702911377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,2048,0.11698880195617675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,2048,0.09312000274658203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,7168,0.2654634634653727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,1536,0.08947733243306479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,1536,0.07525866826375326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,6144,0.228875732421875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,1024,0.06356053352355957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,1024,0.058538667360941564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,768,0.05075413386027018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,5120,0.19009386698404948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,768,0.05085866848627726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,65536,2.6650591532389325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,512,0.0439850648244222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,512,0.047220265865325926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,3072,0.1247711976369222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,512,0.05050346851348877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,256,0.026357332865397137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,2560,0.106605863571167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,256,0.036001066366831466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,128,0.01999893387158712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,128,0.03140693306922913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,64,0.016796799500783284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,1536,0.07602667013804118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,10240,32,0.017771732807159425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,64,0.03204480012257894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,1024,0.059577600161234534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,10240,32,0.03188800017038981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,768,0.05235306819279989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,16384,0.4705375989278157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,16384,0.6861408233642579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,2048,0.09247679710388183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,12288,0.5212287902832031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,256,0.03683946530024211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,65536,1.9401835123697917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,12288,0.3624704043070475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,10240,128,0.03471253315607707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,12288,0.3785813331604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,10240,0.4251680056254069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,10240,0.31327788035074866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,65536,3.0992810567220053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,8192,0.3406325340270996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,8192,0.25833066304524743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,7168,0.3053002675374349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,7168,0.21477866172790527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,7168,0.22927786509195963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,6144,0.2629759947458903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,6144,0.19256319999694824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,5120,0.22011946042378744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,5120,0.16535785992940266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,4096,0.17775039672851561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,4096,0.13590720494588215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,3584,0.15704107284545898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,16384,0.477677853902181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,3584,0.11967039903004964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,3072,0.13599467277526855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,65536,2.147607421875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,3072,0.10589546362559002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,2560,0.1144757350285848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,8192,0.24516266187032065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,2560,0.09136213461558024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,2048,0.09313813050587973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,6144,0.18549760182698566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,2048,0.07826879819234213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,5120,0.15876053174336752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,1536,0.07172693411509196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,1536,0.06457813183466593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,1024,0.05073173443476359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,1024,0.050418134530385336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,3584,0.1154047966003418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,10240,0.30403305689493815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,768,0.04099306662877401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,768,0.046648534138997395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,512,0.03246399958928426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,3072,0.10320853392283122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,512,0.03753173351287842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,512,0.03644693295160929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,256,0.02143893241882324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,256,0.03025813301404317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,2560,0.08783146540323893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,128,0.016616533199946083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,128,0.027265065908432008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,64,0.013710932930310568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,2048,0.07648853460947672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,64,0.028483200073242187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,4096,0.13186346689860026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,8192,32,0.015217066804567973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,8192,32,0.028918399413426714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,1536,0.06253439982732137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,1024,0.049219199021657306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,16384,0.6029909133911133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,768,0.04307626485824585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,16384,0.4309343973795573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,12288,0.44388160705566404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,12288,0.32554985682169596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,65536,1.7890815734863281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,256,0.03088853359222412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,10240,0.37851734161376954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,8192,128,0.028112000226974486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,10240,0.26998933156331384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,10240,0.2939381281534831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,65536,2.7352884928385417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,8192,0.30380051930745444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,8192,0.2202218691507975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,7168,0.26106239954630533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,7168,0.2021333376566569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,7168,0.20156052907307945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,6144,0.22690240542093912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,6144,0.16911039352416993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,5120,0.1907061258951823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,5120,0.1408960024515788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,4096,0.15333546002705892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,4096,0.11735573609670003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,16384,0.43541866938273116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,3584,0.14608853658040363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,65536,1.8968149820963542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,12288,0.31898454030354817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,3584,0.1070186694463094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,3072,0.11220693588256836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,3072,0.09381866455078125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,2560,0.09657279650370279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,8192,0.22065280278523763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,2560,0.08094293276468913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,2048,0.07846720218658447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,2048,0.06799466609954834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,6144,0.16887893676757812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,1536,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,5120,0.14096105893452962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,1536,0.05526613394419352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,1024,0.041819731394449874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,1024,0.04261546532313029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,768,0.032765867312749226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,4096,0.11816106637318928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,768,0.03641173442204793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,512,0.02364586591720581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,3584,0.1057802677154541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,3072,0.09201707045237223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,512,0.031931734085083006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,256,0.015871999661127727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,512,0.03344853321711223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,256,0.027401600281397504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,128,0.013397333025932313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,2560,0.07928213278452555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,128,0.02537706693013509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,64,0.01269653340180715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,64,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,2048,0.0693120002746582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,7168,32,0.013845333456993103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,7168,32,0.026296534140904743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,1536,0.05787093242009481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,16384,0.5058623949686687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,1024,0.045132799943288164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,768,0.039534934361775714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,16384,0.3964757283528646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,65536,1.5594965616861978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,65536,1.588266626993815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,256,0.02776319980621338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,12288,0.38862826029459635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,12288,0.3047231992085775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,65536,2.299723815917969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,12288,0.30030721028645835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,10240,0.24657492637634276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,10240,0.34774293899536135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,7168,128,0.025516800085703534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,8192,0.2565375963846842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,8192,0.21739093462626138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,7168,0.2227306683858236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,7168,0.1831775983174642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,6144,0.19454933802286783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,6144,0.15805974006652831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,5120,0.16435839335123698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,5120,0.13274986743927003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,4096,0.13308266798655194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,4096,0.11025173664093017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,3584,0.11652906735738118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,16384,0.3762666702270508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,3584,0.09915306568145751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,3072,0.1010698636372884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,3072,0.08823146820068359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,10240,0.23823572794596354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,2560,0.08721066315968831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,8192,0.19249812761942547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,2560,0.07805973688761393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,7168,0.16738346417744954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,2048,0.07083733081817627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,2048,0.06564693450927735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,6144,0.14349865913391113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,1536,0.053965866565704346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,1536,0.05409173170725504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,5120,0.1251541296641032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,1024,0.0390997330347697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,1024,0.0430890679359436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,768,0.03192746639251709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,4096,0.10376853148142498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,768,0.037611734867095944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,512,0.02476693391799927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,3584,0.09161919752756754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,512,0.03245760003725688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,256,0.017347200711568197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,256,0.027037866910298664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,3072,0.08063999811808267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,128,0.013590400417645773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,2560,0.06956160068511963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,128,0.024820266167322795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,128,0.02201813260714213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,2048,0.06047573486963907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,64,0.011296000083287556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,64,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,6144,32,0.01146346628665924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,6144,32,0.024871466557184856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,1536,0.04964906771977742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,1024,0.038523733615875244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,768,0.03411413431167602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,16384,0.41960961023966475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,512,0.02837546666463216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,16384,0.34303468068440757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,65536,1.360806401570638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,65536,1.8873930613199872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,12288,0.3233183860778809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,12288,0.26251200040181477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,6144,256,0.023659733931223552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,10240,0.2827818552652995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,10240,0.2254197279612223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,8192,0.21819626490275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,8192,0.1908682664235433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,8192,0.167303466796875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,7168,0.19192852973937988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,7168,0.16148799260457355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,7168,0.1462176005045573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,6144,0.16531839370727539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,6144,0.13847039540608724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,5120,0.1387477397918701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,5120,0.11614399751027424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,4096,0.11131947040557862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,4096,0.09688106377919516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,16384,0.3185183842976888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,3584,0.09838186899820964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,65536,1.317068862915039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,3584,0.08749120235443116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,3072,0.08795200188954672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,3072,0.07803520361582438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,12288,0.24488959312438965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,3072,0.07356586456298828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,2560,0.07022720177968343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,2560,0.06840533415476481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,10240,0.20244053204854331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,2048,0.0573578675587972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,2048,0.05814719994862875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,1536,0.04509866635004679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,1536,0.046869333585103354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,1536,0.043901864687601724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,1024,0.03231679995854696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,1024,0.03778986533482869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,6144,0.1273578643798828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,768,0.02587733268737793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,768,0.03237760066986084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,5120,0.10728320280710857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,512,0.019553067286809285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,512,0.028913066784540815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,256,0.013425067067146301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,4096,0.08864533106486003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,256,0.02328959902127584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,128,0.010002133250236512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,3584,0.07970986366271973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,128,0.021986132860183714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,64,0.00858133335908254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,64,0.021719467639923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,2560,0.061210668087005614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,5120,32,0.008906666437784832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,2048,0.05263253450393677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,5120,32,0.021590399742126464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,1024,0.03427626689275105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,16384,0.34060586293538414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,65536,1.1586804707845053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,512,0.024845866362253825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,65536,1.3880480448404948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,16384,0.2971989313761393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,256,0.02097599903742472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,16384,0.27797441482543944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,12288,0.2553973356882731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,12288,0.2016160011291504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,10240,0.21110080083211263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,12288,0.26379626592000327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,10240,0.19754133224487305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,128,0.019144533077875774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,8192,0.17183465957641603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,8192,0.15304640134175618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,7168,0.15106132825215657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,7168,0.1350111961364746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,6144,0.12844906648000082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,6144,0.11759146849314372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,5120,0.10788373152414958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,5120,768,0.030397866169611615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,5120,0.10150826772054036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,4096,0.08619413375854493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,4096,0.08487466971079508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,3584,0.07759040196736654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,65536,1.1070016225179038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,3584,0.0773845354715983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,3072,0.066975998878479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,3072,0.06792000134785971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,3072,0.05875733296076456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,10240,0.1707039992014567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,2560,0.056542932987213135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,2560,0.05864959955215454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,8192,0.1390954653422038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,2048,0.046397864818573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,2048,0.049347201983133956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,7168,0.1218997319539388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,1536,0.03605653444925944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,1536,0.04025493462880452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,6144,0.10596373081207275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,1024,0.025634133815765382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,5120,0.08953066666920981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,1024,0.03272320032119751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,768,0.02089386582374573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,768,0.02885119915008545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,3584,0.06668799718221029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,512,0.016528000434239708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,512,0.02431360085805257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,2560,0.05073813199996948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,256,0.010983467102050781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,2048,0.043986133734385174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,256,0.021922133366266885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,1536,0.03606186707814534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,128,0.008407466610272725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,128,0.019951999187469482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,128,0.015947733322779337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,4096,0.07411306699117025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,64,0.0071839998165766404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,64,0.019924267133076986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,4096,32,0.00787306676308314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,1024,0.02835306723912557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,4096,32,0.020020266373952232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,768,0.024889600276947022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,16384,0.3238837242126465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,16384,0.2938826560974121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,65536,1.1132981618245443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,65536,1.2867637634277345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,512,0.020544000466664634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,12288,0.24271039962768554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,12288,0.22714452743530272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,10240,0.20027732849121094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,10240,0.1886613368988037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,8192,0.16138346989949542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,8192,0.15147306124369303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,4096,256,0.017571200927098594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,7168,0.14336640040079754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,7168,0.13323413530985515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,6144,0.1241493304570516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,6144,0.11537919839223225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,5120,0.10440320173899334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,5120,0.10000320275624593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,12288,0.206222931543986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,4096,0.08353493213653565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,4096,0.08334399859110514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,16384,0.27590080897013347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,3584,0.07469653288523356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,65536,1.1004554748535156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,3584,0.07465493679046631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,3072,0.06450453201929728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,6144,0.10601173241933186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,3072,0.06600213448206584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,2560,0.054471464951833096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,2560,0.05678613185882568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,7168,0.12422826290130615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,2048,0.044963200887044266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,2048,0.048110934098561604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,8192,0.13915947278340657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,1536,0.03477973143259684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,1536,0.039631998538970946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,5120,0.08998506863911947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,1024,0.02513493299484253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,1024,0.032051199674606325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,10240,0.17256320317586263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,768,0.02062506675720215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,768,0.02837013403574626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,3072,0.05698346694310506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,512,0.016009599963823954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,512,0.024226133028666177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,4096,0.07359039783477783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,256,0.010815999905268351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,256,0.02114773392677307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,2560,0.04951999982198079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,3584,0.06498346726099649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,128,0.008198399841785432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,1536,0.03244373401006063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,128,0.019849600394566853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,64,0.006968533496061961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3584,32,0.0074890668193499255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,64,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3584,32,0.019688532749811808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,65536,1.0771957397460938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,2048,0.04068906704584758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,65536,0.9980202356974284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,16384,0.2639509359995524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,16384,0.2617311954498291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,1024,0.025650133689244587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,12288,0.19665600458780924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,12288,0.1974954605102539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,12288,0.16449492772420246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,10240,0.16732373237609863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,10240,0.16564265886942547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,8192,0.13235627015431722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,256,0.014017066359519959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,8192,0.1336949348449707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,7168,0.11623146533966064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,512,0.017094399531682333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,7168,0.11850666999816895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,128,0.011934933066368104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,6144,0.10044586658477783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,6144,0.10298559665679932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,16384,0.21715839703877768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,65536,0.8396362940470377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,5120,0.08500160376230875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,5120,0.0906442642211914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,4096,0.06800426642100016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,10240,0.13811200459798176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,4096,0.07457386652628581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,3584,0.059655467669169106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,3584,0.06628906726837158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,8192,0.11222933133443196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,3072,0.051620264848073326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,3072,0.05752533276875814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,7168,0.09930986563364665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,2560,0.043961600462595625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,6144,0.086627197265625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,2560,0.04980053504308064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,2048,0.03585066795349121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,2048,0.04235413471857707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,5120,0.07271680037180582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,1536,0.027958399057388304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,1536,0.03535573482513428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,4096,0.06081600189208984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,1024,0.020437333981196085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,1024,0.028858667612075804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,768,0.01673813263575236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,768,0.025600000222524004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,3584,0.05498773256937663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,512,0.012552533547083536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3584,768,0.02068480054537455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,512,0.022792534033457438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,3072,0.048069334030151366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,256,0.008874666690826417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,256,0.02036799987157186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,256,0.01461120049158732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,128,0.007041066884994507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,2560,0.041405868530273435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,128,0.018935465812683107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,64,0.005974400043487549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,64,0.01917866667111715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,3072,32,0.006357333560784657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,2048,0.03588373263676961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,3072,32,0.01906026601791382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,1536,0.02997973362604777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,1024,0.023319466908772787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,16384,0.20788480440775553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,65536,0.8420458475748698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,65536,0.891327985127767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,16384,0.23597332636515297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,768,0.020382932821909585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,12288,0.16085333824157716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,12288,0.18006720542907714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,10240,0.13381439844767254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,512,0.016964266697565712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,10240,0.15420586268107098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,8192,0.10840106805165609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,8192,0.12312533060709636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,7168,0.09481173356374105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,7168,0.10852693716684977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,3072,128,0.01321386694908142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,6144,0.07999786535898844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,6144,0.09417386849721274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,5120,0.06812480290730795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,5120,0.08046933015187582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,12288,0.1667199929555257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,4096,0.05569279988606771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,4096,0.06712640126546224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,16384,0.21868054072062174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,65536,0.8827871958414713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,6144,0.0843733310699463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,3584,0.049092264970143636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,3584,0.06121919949849447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,10240,0.14098240534464518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,3072,0.04275306860605876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,3072,0.053401601314544675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,2560,0.03640106519063314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,2560,0.04614506562550862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,2048,0.030373332897822063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,7168,0.09932160377502441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,2048,0.03943146864573161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,1536,0.023883734146753946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,1536,0.03285013238588969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,8192,0.11458133061726887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,1024,0.017619200547536216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,1024,0.027053866783777875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,5120,0.07322026888529459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,768,0.014075733224550881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,768,0.024756266673405965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,3072,0.045431466897328694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,512,0.011054933071136475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,512,0.021676800648371377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,4096,0.05989546775817871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,256,0.008038400113582611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,1536,0.026467200120290118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,256,0.019514666001001994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,128,0.006664533416430156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,128,0.018346667289733887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,2560,0.040668801466623945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,64,0.006004266440868378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,64,0.0183242658774058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,3584,0.05293013254801432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2560,32,0.006107733150323232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2560,32,0.018224000930786133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,2048,0.033131732543309526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,1024,0.020986666282018028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,65536,0.6473226547241211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,768,0.016744534174601235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,65536,0.8010026931762695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,16384,0.16649279594421387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,16384,0.21138025919596354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,12288,0.1244821310043335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,12288,0.16172800064086915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,12288,0.12801067034403485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,10240,0.10523520310719807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,10240,0.13866346677144367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,8192,0.08374826908111573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,8192,0.11215893427530925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,7168,0.07435093720753988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,7168,0.09863999684651693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,128,0.009877333045005798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,256,0.011598933736483257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,6144,0.06495466629664103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,6144,0.08536960283915201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,65536,0.649173355102539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2560,512,0.013894400000572205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,5120,0.05427519877751669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,16384,0.16491947174072266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,5120,0.07584640185038248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,4096,0.04416106541951497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,4096,0.05937066475550333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,3584,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,3584,0.052345601717631016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,10240,0.10504106680552165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,3072,0.03422613143920898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,3072,0.04570879936218262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,2560,0.029045333464940388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,8192,0.08556479612986247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,2560,0.04025706847508748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,2048,0.024078933397928874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,2048,0.034858667850494386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,7168,0.07497279644012451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,1536,0.019740800062815346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,6144,0.06538879871368408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,1536,0.02977493405342102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,5120,0.055947732925415036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,1024,0.01446399986743927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,1024,0.0253983994325002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,4096,0.04702506860097249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,768,0.011851732929547627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,768,0.022510933876037597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,3584,0.04142186641693115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,512,0.009383466839790345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,3072,0.03631893396377563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,512,0.020317866404851278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,256,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,256,0.018764799833297728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,128,0.005736533304055532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,2560,0.03132373293240865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,128,0.017310933272043864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,64,0.0051584000388781226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,2048,0.02719786763191223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,64,0.017378133535385133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,2048,32,0.00527999997138977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,2048,32,0.01726400057474772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,65536,0.5129237174987793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,1536,0.022165334224700926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,65536,0.7229525248209636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,16384,0.13069547017415364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,1024,0.017553067207336424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,16384,0.19210453033447267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,768,0.015523200233777365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,12288,0.09768853187561036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,12288,0.14803733825683593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,512,0.013152000308036805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,10240,0.08192746639251709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,10240,0.1251530647277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,256,0.011452800035476685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,8192,0.06610026756922403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,8192,0.10163093407948812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,7168,0.05816533168156942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,2048,128,0.010150399804115296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,7168,0.08953493436177572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,6144,0.04978773196538289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,6144,0.07729600270589193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,5120,0.04257386525472005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,5120,0.06521600087483724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,12288,0.11618026892344158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,4096,0.0347264011700948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,4096,0.0524405320485433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,16384,0.15325013796488446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,65536,0.5889653523763021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,3584,0.030689066648483275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,6144,0.05926719903945923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,3584,0.04733546574910481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,10240,0.09663039843241374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,3072,0.026920533180236815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,3072,0.04151573181152344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,2560,0.023176532983779908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,2560,0.036339199542999266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,7168,0.06910293102264405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,2048,0.019352533419926963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,2048,0.03162986636161804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,8192,0.08051520188649496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,1536,0.015288533767064414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,1536,0.026925865809122724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,1024,0.011541333794593812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,1024,0.023486934105555215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,3072,0.0327349325021108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,768,0.009444266557693481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,768,0.021362133820851645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,4096,0.04244053363800049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,512,0.007768533130486806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,512,0.019563732544581096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,1536,0.019349332650502524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,2560,0.028576000531514482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,256,0.005901866654555003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,3584,0.03820159832636515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,256,0.018168532848358156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,128,0.004935466746489207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,128,0.01705066760381063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,64,0.004527999957402547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,64,0.016899200280507405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1536,32,0.004699733356634776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1536,32,0.017030400037765504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,5120,0.05268266598383585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,2048,0.024127999941507973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,65536,0.3355242729187012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,16384,0.08720959822336832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,65536,0.644708251953125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,16384,0.17039999961853028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,12288,0.06593279838562012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,12288,0.13164906501770018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,1024,0.015442132949829102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,10240,0.05621866782506307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,10240,0.11158186594645184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,512,0.011443199714024861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,256,0.009648000200589497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,65536,0.42859732309977217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,128,0.008347733815511068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1536,768,0.013031466801961263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,8192,0.045976531505584714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,8192,0.09050133228302001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,8192,0.058918400605519616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,16384,0.11427733103434246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,7168,0.040378665924072264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,7168,0.0791040023167928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,6144,0.03511039813359578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,6144,0.0674336036046346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,12288,0.08616533279418945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,5120,0.029423999786376952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,4096,0.023695999383926393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,5120,0.0562549352645874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,4096,0.045099735260009766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,3584,0.02103253404299418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,3584,0.040210131804148355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,3072,0.018488534291585288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,3072,0.035916801293691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,2560,0.016012799739837647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,2560,0.03200213313102722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,10240,0.07211306889851889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,2560,0.021678932507832847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,2048,0.013710932930310568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,2048,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,1536,0.010805333654085796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,1536,0.025163733959198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,7168,0.05233706633249918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,1024,0.008498133222262064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,6144,0.04575253327687581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,1024,0.02177813251813253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,5120,0.03889066775639852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,768,0.007353599866231282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,768,0.020118399461110433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,4096,0.03186986645062764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,512,0.006197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,3584,0.028595199187596638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,512,0.018244266510009766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,256,0.004975999891757965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,256,0.016886399189631144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,3072,0.025319466988245647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,128,0.004315733412901561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,128,0.016214399536450704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,64,0.0038751999537150065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,64,0.0160863995552063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,1024,32,0.004084266722202301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,2048,0.018926932414372762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,1024,32,0.016122666994730632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,65536,0.26455039978027345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,1536,0.015657599767049155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,16384,0.07075839837392171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,65536,0.6051071802775065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,1024,0.012618666887283326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,16384,0.15912000338236493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,12288,0.05230933427810669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,768,0.01123306651910146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,12288,0.12301867008209229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,12288,0.08119573593139648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,512,0.009642666578292847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,10240,0.04460586706797282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,10240,0.10428373018900554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,256,0.008391466736793519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,8192,0.0363264004389445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,8192,0.08514880339304606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,1024,128,0.0074432000517845156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,7168,0.031914667288462324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,7168,0.0740991989771525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,7168,0.049437868595123294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,6144,0.02769706646601359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,6144,0.06267946561177572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,5120,0.02280319929122925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,5120,0.051836800575256345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,4096,0.018686934312184652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,4096,0.04160000085830688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,65536,0.4095264116923015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,3584,0.01693333387374878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,16384,0.10687999725341797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,3584,0.038211198647816975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,3072,0.015217066804567973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,10240,0.06832746664683023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,3072,0.03417173226674398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,2560,0.013083733121554055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,8192,0.055978667736053464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,2560,0.02991466720898946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,2048,0.010847999652226766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,2048,0.0268885334332784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,6144,0.04290239810943604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,1536,0.008949333429336548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,5120,0.03693973223368327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,1536,0.02363306681315104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,1024,0.007045333087444305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,1024,0.02092906634012858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,4096,0.02969920039176941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,768,0.006161066889762879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,768,0.019509333372116088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,512,0.005415466427803039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,512,0.018088533480962118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,3584,0.026615466674168902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,256,0.004380799829959869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,3072,0.02297280033429464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,256,0.016663466890652977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,128,0.003918933371702829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,2560,0.02059946656227112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,128,0.016032000382741295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,64,0.003585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,64,0.015638400117556253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,2048,0.01760639945665995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,768,32,0.00373333344856898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,768,32,0.01575573285420736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,1536,0.01439466675122579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,65536,0.18101119995117188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,1024,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,16384,0.05065706570943197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,768,0.009898666540781658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,65536,0.5626815795898438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,16384,0.1539562702178955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,12288,0.0399946649869283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,12288,0.11807359854380291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,10240,0.03187626600265503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,512,0.008712533116340637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,10240,0.10096853574117023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,8192,0.029806933800379437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,8192,0.08106453418731689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,256,0.007538133362929027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,7168,0.026690133412679035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,7168,0.0688746690750122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,768,128,0.006481066842873891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,6144,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,6144,0.05723839998245239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,5120,0.018687999248504637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,5120,0.04699946641921997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,65536,0.3593855857849121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,16384,0.09349439938863119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,4096,0.016330666343371072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,4096,0.03835306564966838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,12288,0.07056319713592529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,3584,0.01409066617488861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,3584,0.03558506568272908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,3072,0.01178666651248932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,10240,0.0596565326054891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,3072,0.03197120030721028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,8192,0.04943466583887736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,2560,0.010706133643786113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,2560,0.028359466791152955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,2048,0.00851093331972758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,7168,0.04364800055821737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,2048,0.02523840069770813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,1536,0.007260799904664357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,6144,0.037590400377909346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,1536,0.022359466552734374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,1024,0.005832533538341522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,5120,0.03214933276176453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,1024,0.019795199235280357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,768,0.005354666709899902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,768,0.01843520005544027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,4096,0.025814400116602583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,512,0.004675200084845225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,512,0.018181333939234413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,3072,0.020734934012095134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,256,0.004002133260170618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,256,0.016049066185951234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,3584,0.02374826669692993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,2048,0.016025599837303162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,128,0.003719466676314672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,1024,0.010493866602579753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,768,0.009262933333714803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,128,0.015571199854214988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,64,0.0034229333202044168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,2560,0.018506666024525963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,64,0.015610667069753012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,512,32,0.0034101332227389016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,512,32,0.015474133690198264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,65536,0.10406293074289959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,16384,0.03049280047416687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,65536,0.5155658721923828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,16384,0.14684906005859374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,12288,0.027368533611297607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,12288,0.11039466857910156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,1536,0.013450666268666586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,10240,0.023000532388687135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,10240,0.0930303970972697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,512,0.00826453318198522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,8192,0.019751467307408652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,8192,0.07362133661905924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,7168,0.015708800156911215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,256,0.007399466633796692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,7168,0.06334613164265951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,6144,0.01360640029112498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,6144,0.052520533402760826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,512,128,0.006410666803518932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,5120,0.011815466483434041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,5120,0.042820266882578534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,65536,0.3321642557779948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,4096,0.010072533289591472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,4096,0.03543573220570882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,16384,0.08686933517456055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,3584,0.008937600255012512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,3584,0.031140265862147014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,12288,0.06637546618779501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,3072,0.008444799979527792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,3072,0.029287467400232952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,10240,0.05625600020090739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,2560,0.007231999933719635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,2560,0.02675519982973735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,8192,0.04571733474731445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,2048,0.006347733239332835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,2048,0.023970133066177367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,7168,0.040598400433858234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,1536,0.0057205334305763245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,6144,0.034686934947967527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,1536,0.021840000152587892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,1024,0.00477866679430008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,1024,0.019348265727361043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,5120,0.029244800408681233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,768,0.004330666859944662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,768,0.01812160015106201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,4096,0.024105600516001382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,512,0.003974399964014689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,512,0.017262933651606242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,3072,0.019384533166885376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,256,0.003479466587305069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,3584,0.02193386753400167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,256,0.016059733430544534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,128,0.0032597333192825317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,128,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,2560,0.01714986761411031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,64,0.003017599880695343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,64,0.015264000495274863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,2048,0.014419200023015341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,256,32,0.0032543999453385672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,256,32,0.01520746648311615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,65536,0.07788693110148112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,1024,0.009540266791979472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,16384,0.02443839907646179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,768,0.008393599589665731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,65536,0.5037898699442546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,1536,0.012195199728012085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,16384,0.1384661356608073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,12288,0.019797333081563315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,10240,0.014658133188883463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,12288,0.10743467013041179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,10240,0.09050880273183187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,8192,0.012327466408411663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,512,0.007272533575693766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,8192,0.0705952008565267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,7168,0.01106666624546051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,7168,0.06211413145065307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,256,0.0065194666385650635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,6144,0.010390399893124899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,256,128,0.005717333157857259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,6144,0.05066773494084677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,5120,0.009321600198745728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,5120,0.04121919870376587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,65536,0.3231840133666992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,16384,0.08453439871470134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,4096,0.010378666718800863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,12288,0.06521493196487427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,4096,0.034430932998657224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,3584,0.00958293378353119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,3584,0.03163520097732544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,10240,0.054758401711781826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,3072,0.008301866551240284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,3072,0.029026132822036744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,8192,0.045075198014577225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,2560,0.007177599767843883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,2560,0.026590933402379353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,7168,0.03948053518931071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,2048,0.006405333181222279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,2048,0.02408426602681478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,6144,0.03380906581878662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,1536,0.005453866720199585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,1536,0.02151573300361633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,5120,0.028200532992680865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,1024,0.004640000065167745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,1024,0.019093332688013713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,768,0.004193066557248434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,4096,0.02335466742515564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,768,0.017754666010538735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,512,0.0037248000502586366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,512,0.01654293338457743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,3072,0.018523732821146645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,256,0.003366400053103765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,3584,0.02141973376274109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,256,0.016290133198102318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,128,0.0030879999200503034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,128,0.01554026703039805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,2560,0.016540799538294473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,64,0.003049599876006444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,2048,0.013999999562899271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,64,0.015378133455912272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,128,32,0.002959999938805898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,128,32,0.014859732985496522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,65536,0.07173013687133789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,16384,0.022107734282811483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,1536,0.011852799852689107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,65536,0.5012415885925293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,1024,0.009302399555842082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,16384,0.13680319786071776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,12288,0.017963733275731406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,10240,0.012449066837628682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,12288,0.10517120361328125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,8192,0.010890666643778484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,10240,0.08889280160268148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,8192,0.06945919990539551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,7168,0.009701333443323771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,7168,0.0590282678604126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,6144,0.009141332904497783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,6144,0.04897600015004476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,5120,0.00844266712665558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,5120,0.04032426675160726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,4096,0.007609599828720092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,4096,0.03323306639989217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,3584,0.006954666475454967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,768,0.008130133152008057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,3584,0.031085866689682006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,3072,0.006525866687297821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,3072,0.028564266363779706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,512,0.007014399766921997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,2560,0.006404266754786174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,256,0.006230400005976359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,2560,0.026179200410842894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,2048,0.005755733450253805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,2048,0.023925334215164185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,1536,0.005006933212280273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,768,0.01806400020917257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,1536,0.021715199947357176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,1024,0.004211199780305227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2048,128,128,0.005389866729577383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,1024,0.019731199741363524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,768,0.003953066716591517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,512,0.003517866631348928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,512,0.016774400075276693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,256,0.003337600082159042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,256,0.015829333662986757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,128,0.002995199958483378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,128,0.015056000153223673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,64,0.002833066632350286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,64,0.014812800288200378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,64,32,0.002792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,64,32,0.014893866578737893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,65536,0.07173333168029786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,16384,0.02158720095952352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,12288,0.01573013365268707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,16384,0.135915740331014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,65536,0.4994847933451335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,12288,0.10420160293579102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,10240,0.010670933127403259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,8192,0.00997440020243327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,10240,0.08775359789530436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,8192,0.06791253089904785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,7168,0.009242666761080424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,7168,0.05831573406855265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,6144,0.008417066931724549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,6144,0.04833600123723348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,5120,0.007693866888682048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,5120,0.03914026816685994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,4096,0.00699946681658427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,4096,0.03374933401743571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,3584,0.006773333251476288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,3584,0.030824534098307294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,2048,0.023792000611623128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,3072,0.0063274666666984555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,3072,0.02844799955685933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,2560,0.0060703997810681665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,2560,0.026077866554260254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,2048,0.0054848000407218935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,1536,0.004820266862710317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,1536,0.021229867140452066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,1024,0.004270933568477631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,512,0.01673706571261088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,1024,0.019061332941055296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,768,0.003912533322970072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,768,0.017729065815607705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,512,0.003576533248027166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,256,0.0031871999303499854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,256,0.015844266613324484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,128,0.0030378667016824085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,128,0.014917332927385965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,64,0.0027456000447273255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,64,0.014760532975196838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2048,32,32,0.002906666696071625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2048,32,32,0.014665599664052328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,12288,1.2210484822591146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,16384,1.5902411142985025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,12288,1.483786646525065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,10240,1.99858881632487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,12288,2.4822921752929688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,10240,1.0005813598632813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,16384,3.317950948079427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,8192,0.8162538528442382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,8192,1.635086949666341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,8192,0.9738858540852865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,7168,0.7257898966471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,7168,1.3860202789306642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,6144,0.6206666946411132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,6144,1.1818548838297525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,6144,0.7114858627319336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,5120,0.9628181457519531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,5120,0.5219637235005696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,16384,1.994162114461263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,4096,0.8019776026407877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,4096,0.4235413233439128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,3584,0.6434901555379231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,3584,0.4613919893900554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,3072,0.5671498616536458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,10240,1.2044116973876953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,3072,0.32329066594441735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,2560,0.48985919952392576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,2560,0.28437865575154625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,7168,0.8305269241333008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,2048,0.22542400360107423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,2048,0.38889598846435547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,1536,0.28091840744018554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,1536,0.18586026827494304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,1024,0.19777386983235676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,5120,0.5728864034016927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,1024,0.13054719765981038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,4096,0.4761173248291016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,768,0.1545514742533366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,3584,0.42039146423339846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,768,0.11060586770375569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,512,0.11279786427815755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,512,0.0837503989537557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,3072,0.3633354822794596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,256,0.0668778657913208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,256,0.0668170690536499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,2560,0.3121013323465983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,128,0.05446826616923014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,128,0.06252693335215251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,2048,0.2640885353088379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,64,0.04938559929529826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,64,0.0632863998413086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,65536,32,0.04967679977416992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,65536,32,0.06273706754048666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,1536,0.2177824020385742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,1024,0.16847467422485352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,768,0.1490666707356771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,16384,0.7051722844441731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,512,0.12458559672037761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,16384,0.4255690574645996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,256,0.1040981372197469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,65536,1.8498143513997394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,12288,0.31978559494018555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,12288,0.5222261428833008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,65536,128,0.09803307056427002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,65536,2.052624003092448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,10240,0.2677482604980469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,10240,0.42796907424926756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,10240,0.2944106737772624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,8192,0.3452927907307943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,8192,0.23423892656962075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,8192,0.23723840713500977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,7168,0.3010474522908529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,65536,3.189539082845052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,7168,0.20593813260396324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,7168,0.2162090619405111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,6144,0.2610058625539144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,6144,0.17119146982828776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,5120,0.22370239893595376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,5120,0.14816853205362956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,4096,0.17669013341267903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,4096,0.12130773067474365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,3584,0.158405335744222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,3584,0.1086623986562093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,3072,0.13487787246704103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,3072,0.09635093212127685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,16384,0.4589813232421875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,12288,0.35008427302042644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,2560,0.11768533388773601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,2560,0.08950080076853434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,2048,0.09849173227945963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,2048,0.07645119825998942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,2048,0.07322879632314047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,6144,0.1747605323791504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,1536,0.07248533566792806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,5120,0.14887146949768065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,1536,0.0590229352315267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,1024,0.0509493350982666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,1024,0.04785919984181722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,4096,0.12521493434906006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,1024,0.04813119967778524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,768,0.04077440102895101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,768,0.04226239919662476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,3584,0.11107306480407715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,768,0.04306986729303996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,512,0.031195733944574994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,512,0.03584853410720825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,512,0.03615466753641765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,256,0.02178986668586731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,256,0.029956267277399702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,256,0.03027733365694682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,128,0.01705066760381063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,3072,0.0971893310546875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,128,0.027056000630060834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,64,0.013623467087745667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,64,0.02829440037409465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,16384,32,0.014728533228238425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,2560,0.08389120101928711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,16384,32,0.027484800418217974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,16384,0.33308693567911785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,16384,0.517080529530843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,16384,0.34424638748168945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,1536,0.061166934172312414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,12288,0.38824211756388344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,12288,0.2562421321868896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,65536,1.306055450439453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,10240,0.3263061205546061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,10240,0.21244586308797203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,10240,0.22226452827453613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,8192,0.17363093694051107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,8192,0.257370662689209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,65536,2.3655967712402344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,7168,0.22938987414042153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,7168,0.15527572631835937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,16384,128,0.028424533208211263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,6144,0.21256532669067382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,6144,0.14061439832051595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,5120,0.1642848014831543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,5120,0.1258581320444743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,5120,0.11698880195617675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,4096,0.13315733273824054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,4096,0.10112960338592529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,4096,0.09747626781463622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,3584,0.11744533379872639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,3584,0.08964266777038574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,3072,0.10204586982727051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,3072,0.07879253228505453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,2560,0.08809066613515218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,2560,0.06890559991200765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,12288,0.2598752021789551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,2560,0.06496106783548991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,65536,1.5474868774414063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,2048,0.0701632022857666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,2048,0.06046613454818726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,1536,0.054273064931233725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,1536,0.05020800034205118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,8192,0.18007893562316896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,1024,0.03974826733271281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,7168,0.15553812980651854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,1024,0.04089920123418172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,6144,0.13704214096069336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,768,0.03226666649182637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,768,0.03608640034993489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,512,0.02463679909706116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,512,0.03128746747970581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,512,0.02823573350906372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,256,0.01757226586341858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,256,0.02730026642481486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,3584,0.08494719664255777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,128,0.013964800039927163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,128,0.023642667134602866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,3072,0.07483733495076497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,64,0.011122133334477742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,64,0.025521065791447955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,12288,32,0.011823999881744384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,12288,32,0.024711465835571288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,2048,0.057248000303904215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,1536,0.0469589352607727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,1024,0.03730346759160359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,65536,1.1487115224202475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,16384,0.4288362820943196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,768,0.0332042674223582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,16384,0.2887477238972982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,65536,1.8634357452392578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,12288,0.3378368059794108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,65536,1.2837674458821615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,12288,0.21664959589640298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,256,0.023732266823450723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,12288,0.23447252909342448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,10240,0.2830581347147624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,12288,128,0.021756800015767415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,10240,0.19709866841634113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,8192,0.21941653887430826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,8192,0.15525439580281575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,7168,0.1884010632832845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,7168,0.13904107411702474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,6144,0.1657248020172119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,6144,0.11662720044453938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,6144,0.11662080287933349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,5120,0.13961386680603027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,5120,0.10036053657531738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,4096,0.11274453004201253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,4096,0.08481386502583822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,3584,0.09712213675181071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,3584,0.07647039890289306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,3584,0.07161493301391601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,3072,0.08416639963785807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,16384,0.29177920023600257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,3072,0.06876160303751627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,2560,0.07128533522288004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,3072,0.06570666631062826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,10240,0.1885866641998291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,2560,0.05975893338521322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,2560,0.05498666763305664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,2048,0.05874880154927572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,8192,0.15089599291483563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,2048,0.052059733867645265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,1536,0.046198399861653645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,1536,0.04352213144302368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,7168,0.13357653617858886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,1536,0.04085760116577149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,1024,0.032232532898585006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,1024,0.032638933261235556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,1024,0.03488959868748982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,768,0.025857067108154295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,768,0.031243733565012616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,5120,0.09853546619415283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,512,0.01975040038426717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,512,0.027265065908432008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,4096,0.08132586479187012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,512,0.024280534187952677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,256,0.014232533176740012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,256,0.023730132977167764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,128,0.010216533144315084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,128,0.021603200833002725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,64,0.00906773308912913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,64,0.021523199478785195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,10240,32,0.009071999788284301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,10240,32,0.021436800559361778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,16384,0.34208854039510095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,2048,0.0483573317527771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,16384,0.24749652544657388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,65536,0.9670613606770834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,16384,0.2356010595957438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,65536,1.4941460927327475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,12288,0.25787199338277184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,768,0.02879040042559306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,12288,0.20736533800760903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,10240,0.22903787295023598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,10240,0.1620576063791911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,8192,0.17502613067626954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,8192,0.1345461368560791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,256,0.0204202671845754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,7168,0.154092804590861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,10240,128,0.018730666240056357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,7168,0.11988159815470378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,6144,0.1329749345779419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,6144,0.09970666567484537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,6144,0.09526080290476481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,5120,0.11148373285929362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,5120,0.08067306677500406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,5120,0.08636053403218588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,4096,0.09033173720041911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,4096,0.07333652973175049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,3584,0.07973653475443522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,3584,0.06613333225250244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,3072,0.06785386403401693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,3072,0.059470931688944496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,65536,0.970700772603353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,2560,0.06626240015029908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,12288,0.1783626715342204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,2560,0.054790401458740236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,10240,0.15161172548929852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,2048,0.047838934262593585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,2048,0.04434560139973958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,8192,0.12459413210550944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,1536,0.036158935228983564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,1536,0.03687040011088054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,7168,0.10976426601409912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,1024,0.025908267498016356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,1024,0.03062719901402791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,768,0.02104533314704895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,4096,0.06723732948303222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,768,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,3584,0.05986560185750326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,512,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,512,0.024291199445724488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,3072,0.05324159860610962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,256,0.01169706682364146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,256,0.021630932887395225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,2560,0.0459935983022054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,128,0.00846613347530365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,2048,0.0408405343691508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,128,0.019911466042200725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,64,0.007342933118343354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,1536,0.03351253271102905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,64,0.019744000832239785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,8192,32,0.007707733412583668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,1024,0.026553599039713542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,8192,32,0.019828265905380248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,768,0.024027733008066814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,65536,0.9075712203979492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,65536,1.3349834442138673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,512,0.020409599939982096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,16384,0.3246901194254557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,16384,0.23578240076700846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,12288,0.24720427195231118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,12288,0.17954773902893068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,10240,0.20507520039876304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,256,0.01721386710802714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,10240,0.15101760228474934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,8192,0.1650752067565918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,8192,0.12410026391347248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,8192,128,0.015734400351842245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,7168,0.14485972722371418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,7168,0.11085440317789715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,6144,0.12487573623657226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,6144,0.09783146381378174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,5120,0.10362880229949951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,5120,0.08421973387400308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,12288,0.18552212715148925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,4096,0.08545706272125245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,4096,0.0709706703821818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,65536,0.9885642369588217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,3584,0.07974932988484701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,16384,0.24421547253926595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,3584,0.06848106384277344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,6144,0.09500373204549153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,3072,0.06690666675567628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,3072,0.05645866791407267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,10240,0.1564575990041097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,2560,0.055048533280690516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,2560,0.04981546799341838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,7168,0.11170559724171955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,2048,0.04508800109227498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,2048,0.042821331818898516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,1536,0.034858667850494386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,1536,0.03628053267796834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,8192,0.12617386976877848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,1024,0.02534293333689372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,1024,0.030308266480763752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,5120,0.0801034688949585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,768,0.020525866746902467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,768,0.027292799949645997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,3072,0.051319468021392825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,512,0.015983999768892924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,512,0.024537599086761473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,4096,0.06660799980163574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,256,0.010980266332626342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,256,0.020973867177963255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,1536,0.02967039942741394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,128,0.0081194669008255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,128,0.0194815993309021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,2560,0.044649600982666016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,64,0.006897066533565521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,3584,0.05838826497395834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,64,0.019540266195933024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,7168,32,0.007590400179227193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,7168,32,0.019569067160288493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,768,0.02032853364944458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,2048,0.03731093406677246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,65536,0.7882901509602864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,16384,0.26705706914265953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,1024,0.0240447998046875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,65536,1.1369280497233072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,16384,0.21057066917419434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,12288,0.20225920677185058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,12288,0.15887467066446942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,10240,0.16954239209493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,10240,0.13085652987162272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,10240,0.1201365311940511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,8192,0.1338762601216634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,8192,0.10679466724395752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,7168,0.1194111982981364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,7168,0.09591360092163086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,7168,0.08603200117746988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,256,0.013435733318328858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,6144,0.10316373507181804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,512,0.017064533631006875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,6144,0.08446826934814453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,6144,0.07471360365549723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,5120,0.08375679651896159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,5120,0.07363093694051107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,7168,128,0.011725866794586181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,4096,0.06973013083140055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,4096,0.06192213296890259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,16384,0.18702826499938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,65536,0.7343722661336263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,3584,0.06162773370742798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,3584,0.05571413437525431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,12288,0.14188480377197266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,3072,0.053478399912516274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,3072,0.05006080071131388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,2560,0.04500159819920858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,2560,0.04314560095469157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,2560,0.03664746681849162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,8192,0.09733760356903076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,2048,0.03648746808369954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,2048,0.0374944011370341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,1536,0.028369067112604777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,1536,0.03182613253593445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,1024,0.020312533775965372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,1024,0.02717546621958415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,5120,0.06380266745885213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,768,0.016798933347066246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,4096,0.05321493148803711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,768,0.02490880091985067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,512,0.013084800044695536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,3584,0.04817386468251546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,512,0.021885865926742555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,256,0.008943999807039898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,512,0.016569599509239197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,3072,0.04232426484425862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,256,0.019725867112477622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,128,0.007229866584142049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,128,0.018542933464050292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,128,0.012985600034395852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,64,0.006182399888833364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,2048,0.03236266573270162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,64,0.018950400749842326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,6144,32,0.006381866832574208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,6144,32,0.018847999970118205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,1536,0.026781866947809856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,16384,0.2115114688873291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,16384,0.176254940032959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,1024,0.021564799547195434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,65536,0.6700639724731445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,65536,0.8417472203572591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,12288,0.16209813753763835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,12288,0.13206720352172852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,10240,0.13562134106953938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,10240,0.11274027029673259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,768,0.019688532749811808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,8192,0.10854506492614746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,8192,0.09421439965566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,7168,0.09577173391977946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,7168,0.0852181355158488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,6144,256,0.013833600282669067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,6144,0.08277013301849365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,6144,0.07509653568267823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,5120,0.06784640153249105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,5120,0.06537493467330932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,12288,0.1425439993540446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,4096,0.05627306699752808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,4096,0.0549397349357605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,16384,0.18846507072448732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,65536,0.7820778528849284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,3584,0.04955413341522217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,6144,0.07408533096313477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,3584,0.05323520104090372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,10240,0.12222399711608886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,3072,0.043525334199269614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,3072,0.04548480113347371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,2560,0.0368618647257487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,2560,0.03951893250147502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,7168,0.08612693150838216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,2048,0.030536532402038574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,2048,0.034254932403564455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,1536,0.02390186587969462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,1536,0.0297760009765625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,8192,0.09977386792500814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,1024,0.017900800704956053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,1024,0.025619200865427655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,5120,0.06350080172220865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,768,0.014514133334159851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,768,0.022406399250030518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,3072,0.04005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,512,0.011276800433794658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,4096,0.052045865853627526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,512,0.021396267414093017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,256,0.00811413327852885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,256,0.01926400065422058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,1536,0.023861332734425863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,128,0.006704000135262807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,128,0.017783466974894205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,2560,0.03546346823374431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,64,0.005859200159708659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,64,0.017963733275731406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,5120,32,0.006106666723887126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,3584,0.04670399824778239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,5120,32,0.017873066663742065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,2048,0.02961066762606303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,1024,0.01970240076382955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,65536,0.6652160008748372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,65536,0.5815807978312175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,16384,0.17098347345987958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,768,0.01611306667327881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,16384,0.15434880256652833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,12288,0.12515199979146321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,12288,0.11632853349049885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,10240,0.10787306626637776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,10240,0.09918399651845297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,256,0.011013333002726238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,128,0.00970240036646525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,8192,0.08701226711273194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,8192,0.0827018658320109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,5120,512,0.013339733084042868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,8192,0.07031359672546386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,65536,0.5341141382853191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,16384,0.13531732559204102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,7168,0.07552533149719239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,7168,0.07439253330230713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,12288,0.10204586982727051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,6144,0.0664405345916748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,6144,0.06671786308288574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,5120,0.05495253403981527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,5120,0.05717973311742147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,10240,0.08723946412404379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,4096,0.04549013376235962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,4096,0.048767999807993574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,3584,0.03983466625213623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,3584,0.04285973310470581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,3584,0.035674667358398436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,3072,0.034679468472798666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,3072,0.038671998182932536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,2560,0.02964800000190735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,2560,0.0348416010538737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,2560,0.027293866872787474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,2048,0.024621866146723428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,2048,0.031070933739344282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,1536,0.01962560017903646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,1536,0.02694186568260193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,1024,0.014892799655596414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,1024,0.023744000991185506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,7168,0.06233386596043905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,768,0.01223466694355011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,6144,0.054946132500966395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,768,0.02118933399518331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,5120,0.04670399824778239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,512,0.009404800335566203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,512,0.019530665874481202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,4096,0.03926080067952474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,256,0.0070154666900634766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,256,0.018040533860524496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,128,0.005894400179386139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,3072,0.031226666768391927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,128,0.01738986571629842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,64,0.005115733544031779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,64,0.017177599668502807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,2048,0.023996800184249878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,4096,32,0.005448533097902933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,4096,32,0.017115734020868936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,1536,0.020300799608230592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,65536,0.6378751754760742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,1024,0.01646506687005361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,65536,0.5475946426391601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,16384,0.154585599899292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,16384,0.14208426475524902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,768,0.014618666966756186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,12288,0.11680320103963215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,12288,0.10804159641265869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,512,0.012663466731707254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,10240,0.09735679626464844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,10240,0.09270826975504556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,256,0.010800000031789143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,8192,0.07862613201141358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,8192,0.07778879801432291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,4096,128,0.01009386678536733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,7168,0.06920106410980224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,7168,0.06943039894104004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,7168,0.06341653267542521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,6144,0.0594101349512736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,6144,0.06176640192667643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,5120,0.04948480129241943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,5120,0.052254935105641685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,4096,0.03991359869639079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,4096,0.043562666575113936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,65536,0.5444469451904297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,3584,0.035589333375295004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,16384,0.13978133201599122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,3584,0.039740800857543945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,12288,0.10509013334910075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,3072,0.030853333075841267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,3072,0.03600746790568034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,3072,0.030449066559473676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,2560,0.025964800516764325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,10240,0.08912639617919922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,2560,0.03198826710383097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,2048,0.021336533625920615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,8192,0.07077013651529948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,2048,0.02850666642189026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,1536,0.01685973405838013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,1536,0.02505600055058797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,6144,0.05494720141092936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,1024,0.012434132893880208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,5120,0.04666453202565511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,1024,0.021461333831151327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,768,0.010246400038401287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,768,0.019764266411463418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,3584,0.03423360188802083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,512,0.007762133578459422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,512,0.018689066171646118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,2560,0.026557866732279462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,256,0.0064735998709996535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,2048,0.02259626587231954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,256,0.017570134003957114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,4096,0.03895999987920125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,128,0.0051584000388781226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,128,0.016825600465138753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,1536,0.018504534165064493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,64,0.004891733328501383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,64,0.017157334089279174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3584,32,0.005184000233809153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3584,32,0.01699519952138265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,1024,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,65536,0.5231722513834636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,65536,0.5046463966369629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,16384,0.13141547044118246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,768,0.012824533383051553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,16384,0.14662399291992187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,12288,0.099126402537028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,12288,0.10040319760640462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,10240,0.08308800061543783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,10240,0.08684906959533692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,8192,0.06754346688588461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,512,0.010974933703740437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,8192,0.07270399729410806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,7168,0.059539198875427246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,256,0.008980266253153483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,7168,0.06552106539408366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3584,128,0.008059733112653096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,6144,0.05141866604487101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,6144,0.058372267087300624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,5120,0.04355200131734212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,5120,0.04912000099817912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,12288,0.09162666797637939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,4096,0.034593065579732254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,4096,0.040948267777760824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,65536,0.47045119603474933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,16384,0.1234239975611369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,3584,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,3584,0.03845226764678955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,3072,0.027210666735966997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,10240,0.0779807964960734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,3072,0.034137598673502606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,2560,0.023283199469248454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,2560,0.031035733222961426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,6144,0.04886399904886882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,2048,0.019332265853881835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,2048,0.02762239972750346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,7168,0.056345601876576744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,1536,0.015583999951680503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,1536,0.023880533377329507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,8192,0.06520959933598837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,1024,0.011839999755223592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,1024,0.021894399325052896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,5120,0.04297066529591878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,768,0.009499733646710713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,768,0.019992534319559732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,4096,0.034689064820607504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,512,0.007606400052706401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,3072,0.027480532725652058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,512,0.01858133276303609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,256,0.005937066674232483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,256,0.017464532454808553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,1536,0.01694719990094503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,3584,0.032102400064468385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,128,0.0050240000089009605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,128,0.01627306640148163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,2560,0.024541866779327393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,64,0.0045962666471799215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,64,0.016323199868202208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,3072,32,0.004773333172003428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,3072,32,0.016683733463287352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,768,0.012103466192881267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,65536,0.4318688074747722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,65536,0.47362133661905925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,2048,0.021156267325083414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,16384,0.11258453528086346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,1024,0.01360426644484202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,16384,0.12262293497721355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,12288,0.08519039948781332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,10240,0.07133333683013916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,12288,0.09390400250752767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,10240,0.08179413477579753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,8192,0.05761173168818155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,8192,0.06885333061218261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,256,0.009124267101287841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,7168,0.050025598208109534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,512,0.010972799857457478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,7168,0.06097386678059896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,6144,0.043340798219045004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,3072,128,0.00809386670589447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,6144,0.05337599913279215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,5120,0.036880000432332354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,5120,0.046401067574818926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,12288,0.08633600076039633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,4096,0.030861866474151612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,4096,0.03863573471705119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,65536,0.44475625356038406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,16384,0.11502400239308674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,3584,0.027474133173624675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,3584,0.035811201731363936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,3072,0.023921066522598268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,10240,0.07242879867553711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,3072,0.03281919956207276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,2560,0.020734934012095134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,2560,0.029785599311192828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,8192,0.05963626702626547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,2048,0.017668267091115318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,6144,0.04482666651407878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,7168,0.052484265963236486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,2048,0.02709653377532959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,1536,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,1536,0.024538666009902954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,1024,0.011059199770291645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,1024,0.021269333362579346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,5120,0.03915946483612061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,768,0.009301333626111349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,4096,0.03222080071767171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,3072,0.02531519929567973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,768,0.01993066668510437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,512,0.0075765331586201985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,512,0.0188810666402181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,256,0.005678933362166087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,256,0.017080533504486083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,3584,0.029364265998204547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,128,0.004855466882387797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,128,0.016375466187795003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,2560,0.022911999622980753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,64,0.004358399907747904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,64,0.016709333658218382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2560,32,0.004694400231043497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,1536,0.015537066260973611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2560,32,0.01660053332646688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,65536,0.3403861363728841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,65536,0.42139733632405596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,16384,0.08853546778361002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,16384,0.11086400349934895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,1024,0.013008000453313193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,2048,0.019319466749827065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,12288,0.06586666504542033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,12288,0.08465813000996908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,10240,0.0564138650894165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,10240,0.07287253538767496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,256,0.008251733581225077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,512,0.009940266609191895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,8192,0.0443391998608907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,128,0.007439999779065449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,8192,0.0596010684967041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2560,768,0.01123520036538442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,8192,0.04456959962844849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,65536,0.3171488126118978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,7168,0.04013653198877971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,7168,0.05231039921442667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,6144,0.03403733174006145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,6144,0.04582826693852742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,16384,0.0845749298731486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,5120,0.028859732548395793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,5120,0.04012906551361084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,4096,0.02376213272412618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,4096,0.03438933293024699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,12288,0.06352426608403525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,4096,0.025121066967646283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,3584,0.021282132466634116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,3584,0.03182400067647298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,3072,0.01858453353246053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,10240,0.054022399584452305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,3072,0.029658667246500653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,2560,0.016315733393033348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,2560,0.026594134171803792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,2048,0.013753599921862283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,2048,0.024569600820541382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,1536,0.01116480032602946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,1536,0.0214303990205129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,7168,0.0395520011583964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,1024,0.00844586690266927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,6144,0.03454080025355021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,1024,0.019617066780726115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,5120,0.029452800750732422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,768,0.007332266867160797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,768,0.018604799111684164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,768,0.009992532928784688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,512,0.00613013356924057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,512,0.017748266458511353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,3584,0.02296533385912577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,256,0.004891733328501383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,256,0.016804265975952148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,3072,0.020268799861272176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,128,0.00432640016078949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,128,0.01612160007158915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,2560,0.01790613333384196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,64,0.0038677332301934562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,2048,0.015247999628384908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,64,0.01599253316720327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,2048,32,0.004102399945259095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,2048,32,0.016127999623616537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,1536,0.012888532876968384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,65536,0.26324480374654136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,16384,0.06675093173980713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,65536,0.3783583958943685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,16384,0.10040960311889649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,12288,0.05090879996617635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,1024,0.010844799876213073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,12288,0.07725439866383871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,10240,0.0426634669303894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,512,0.00906986693541209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,10240,0.0653162678082784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,8192,0.03472426732381185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,256,0.007720533510049183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,8192,0.052891735235850015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,7168,0.03068586587905884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,2048,128,0.007338666419188182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,7168,0.04665173292160034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,7168,0.036841599146525066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,6144,0.0264682670434316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,6144,0.04203306833902995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,6144,0.0315338671207428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,5120,0.022565333048502605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,5120,0.03654719988505046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,4096,0.01877546707789103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,65536,0.2963552157084147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,4096,0.031933865944544476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,16384,0.0772320032119751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,3584,0.016807466745376587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,3584,0.029656533400217695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,3072,0.014973866939544677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,12288,0.05865600109100342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,3072,0.027140265703201293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,2560,0.013175466656684875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,2560,0.02568746606508891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,2048,0.010940800110499065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,10240,0.049981868267059325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,2048,0.02241920034090678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,8192,0.0421120007832845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,1536,0.009004799524943034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,1536,0.020181334018707274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,1024,0.007158400118350982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,1024,0.019767467180887857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,5120,0.028119466702143353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,768,0.006246399879455566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,4096,0.02285760045051575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,768,0.018083200852076212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,768,0.008893866340319316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,3584,0.02119893431663513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,512,0.005399466554323832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,512,0.01698346734046936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,3072,0.01829013427098592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,256,0.004422399898370107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,256,0.016032000382741295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,128,0.0039327998956044516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,2560,0.01623679995536804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,128,0.015532799561818442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,64,0.003537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,64,0.01556373337904612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1536,32,0.0037450666228930154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,2048,0.013853866855303446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1536,32,0.015212800105412802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,65536,0.17888320287068685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,1536,0.011680000027020772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,65536,0.33404267628987633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,16384,0.05036373138427734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,16384,0.094706130027771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,1024,0.009636267026265462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,12288,0.039740800857543945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,12288,0.06948053042093913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,10240,0.03161386648813884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,10240,0.0588757316271464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,8192,0.028417066733042402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,8192,0.04779520034790039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,512,0.008130133152008057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,256,0.006817066669464111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,7168,0.02291626731554667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,7168,0.042327467600504556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1536,128,0.006411733229955037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,6144,0.020266666014989217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,6144,0.03757013479868571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,5120,0.018114133675893148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,5120,0.03368106683095296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,65536,0.24309333165486655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,16384,0.06296533346176147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,4096,0.015714133779207863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,12288,0.04831146796544393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,4096,0.028515199820200603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,3584,0.013745066523551942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,3584,0.02635626594225566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,3072,0.011749333143234253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,3072,0.024829866488774617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,8192,0.03415786822636922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,2560,0.010053333640098572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,2560,0.023281067609786987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,7168,0.029769599437713623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,6144,0.02637760043144226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,2048,0.008327466746171314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,2048,0.02152000069618225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,5120,0.02333546678225199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,1536,0.007785599927107494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,1536,0.019950934251149497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,10240,0.04144320090611776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,1024,0.005734399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,1024,0.018234666188557944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,768,0.005331199864546458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,768,0.017493333419164023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,4096,0.019368533293406168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,512,0.0047189335028330484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,512,0.016572800278663636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,3072,0.015611732999483744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,256,0.003980800012747447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,256,0.015533866484959922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,3584,0.01777600049972534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,128,0.003583999971548716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,128,0.01551040013631185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,2048,0.012285866340001424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,64,0.003304533412059148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,64,0.015280000368754067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,1024,32,0.0034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,2560,0.014212266604105631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,1024,32,0.015372799833615622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,65536,0.13925226529439289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,1024,0.00872320036093394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,65536,0.3116832097371419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,16384,0.03949013153711955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,16384,0.08958720366160075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,12288,0.030562132596969604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,12288,0.06552106539408366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,1536,0.010875733693440755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,10240,0.027461334069569902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,768,0.008258133133252462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,10240,0.05528959830602011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,512,0.007614933451016744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,8192,0.024583466847737632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,8192,0.045084798336029054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,256,0.006626133124033611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,7168,0.02164906660715739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,7168,0.03924800157546997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,1024,128,0.006140799820423126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,6144,0.016323199868202208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,6144,0.034601600964864095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,5120,0.015397333105405173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,5120,0.03113493323326111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,12288,0.04782400131225586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,16384,0.062609068552653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,65536,0.24107306798299155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,4096,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,4096,0.025868799289067584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,3584,0.010984533031781514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,3584,0.024369066953659056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,3072,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,3072,0.024074665705362954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,10240,0.0409877339998881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,2560,0.008180266618728638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,2560,0.022383999824523926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,6144,0.02568639914194743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,7168,0.029423999786376952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,8192,0.033618132273356124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,2048,0.0072053333123524976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,2048,0.020422399044036865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,1536,0.006243200103441874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,1536,0.018849066893259683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,1024,0.0052821333209673565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,1024,0.01840426723162333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,5120,0.022232532501220703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,768,0.004731733103593191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,768,0.016633599996566772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,3072,0.014826666315396628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,4096,0.018631466229756675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,512,0.004187733431657155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,512,0.016203733285268147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,256,0.00359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,3584,0.017012266318003337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,256,0.015531733632087708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,128,0.003356799980004629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,128,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,2560,0.013431466619173684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,1536,0.009629866480827332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,64,0.0030752000709374744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,64,0.014780799547831217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,768,32,0.00327360009153684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,768,32,0.014923733472824097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,65536,0.09628693262736002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,2048,0.01158186693986257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,65536,0.2880917231241862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,1024,0.008158933122952778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,16384,0.02892586588859558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,768,0.007420800129572551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,16384,0.08415573438008626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,12288,0.024663466215133666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,12288,0.06055466731389364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,10240,0.022272000710169472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,10240,0.049437868595123294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,8192,0.018438400824864705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,8192,0.04057173331578572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,512,0.006790400048096975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,256,0.005736533304055532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,7168,0.01581653356552124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,768,128,0.005433600147565206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,7168,0.03697173198064168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,6144,0.013492266337076822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,6144,0.032807467381159465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,5120,0.011823999881744384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,65536,0.21695146560668946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,5120,0.02916373411814372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,4096,0.010449066758155823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,16384,0.0572106679280599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,4096,0.025830399990081788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,12288,0.04409279823303223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,3584,0.009205333391825358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,3584,0.02437333265940348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,10240,0.037514666716257736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,3072,0.008036266764005024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,3072,0.023192532857259116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,2560,0.007594666878382365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,8192,0.03014613389968872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,2560,0.021731199820836385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,2048,0.006615466872851054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,2048,0.020294400056203206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,7168,0.027129600445429485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,1536,0.005625600119431814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,1536,0.018374399344126383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,5120,0.02032853364944458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,1024,0.004756266872088114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,4096,0.017114667097727458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,1024,0.017643733819325765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,768,0.004358399907747904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,768,0.016724266608556113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,3072,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,512,0.003947733342647553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,512,0.01616106629371643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,3584,0.015973333517710367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,256,0.0034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,6144,0.02363413373629252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,256,0.015446399648984274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,128,0.003289599965016047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,2560,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,2048,0.010892800490061442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,128,0.014866133530934652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,64,0.0030847998956839246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,64,0.01492800017197927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,512,32,0.0032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,512,32,0.0150026669104894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,65536,0.05405333439509073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,65536,0.2690474510192871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,16384,0.02038933237393697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,1536,0.00947093367576599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,16384,0.07614506880442301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,12288,0.016593066851298015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,12288,0.05530453523000082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,1024,0.007712000111738841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,768,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,10240,0.014350933829943338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,10240,0.04578453302383423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,8192,0.012745599945386252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,512,0.006718933085600535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,8192,0.037633065382639566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,128,0.0053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,7168,0.011073066790898641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,7168,0.034110931555430095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,6144,0.010202667117118836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,6144,0.031726932525634764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,65536,0.20814933776855468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,5120,0.009704533219337463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,5120,0.028622933228810626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,16384,0.05531946818033854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,4096,0.008958933750788371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,4096,0.025521065791447955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,12288,0.041766401131947836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,3584,0.008282666901747386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,3584,0.024411733945210776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,512,256,0.0059562668204307554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,3072,0.007368533313274384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,3072,0.023244800170262654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,10240,0.03521920045216878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,2560,0.007152000069618225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,8192,0.02899199922879537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,2560,0.02170133392016093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,2048,0.006468266745408376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,7168,0.02566293279329936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,2048,0.020078933238983153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,1536,0.00581333339214325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,6144,0.02241493264834086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,1536,0.018595200777053834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,1024,0.0048437332113583885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,1024,0.017308799425760905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,5120,0.019569067160288493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,768,0.004362666606903076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,768,0.016833066940307617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,4096,0.016459733247756958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,512,0.0037429332733154297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,512,0.016320000092188515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,3584,0.015406933426856995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,256,0.0034175999462604523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,256,0.015211733182271323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,2048,0.010426666339238484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,2560,0.012221866846084594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,128,0.0030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,1536,0.009168000022570292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,128,0.014820266763369241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,1024,0.007628799974918365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,64,0.002869333326816559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,64,0.014648532867431641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,256,32,0.0030346666773160298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,256,32,0.014595199624697366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,65536,0.04321706692377726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,3072,0.013492266337076822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,65536,0.2068298657735189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,16384,0.011749333143234253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,65536,0.26153599421183266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,16384,0.07088747024536132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,12288,0.010414933164914448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,768,0.007123200098673503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,12288,0.05039893388748169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,10240,0.009382399916648864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,512,0.006348800162474315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,10240,0.042795733610788984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,8192,0.009172266721725464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,8192,0.0360149343808492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,7168,0.008740267157554627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,7168,0.03324906627337138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,6144,0.008158933122952778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,6144,0.03023359974225362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,256,0.005542399982611338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,16384,0.055650134881337486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,5120,0.0075647999842961625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,256,128,0.005258666475613912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,5120,0.028920533259709676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,4096,0.007062399884064992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,12288,0.04173333247502645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,4096,0.025363200902938844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,10240,0.03562560081481934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,3584,0.0077461332082748415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,3584,0.023875200748443605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,3072,0.007319466769695282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,8192,0.028870399792989093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,3072,0.02286720077196757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,2560,0.006382933259010315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,7168,0.02580159902572632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,2560,0.02108479936917623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,2048,0.005735466877619425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,2048,0.019803732633590698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,1536,0.005003733436266581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,6144,0.022618667284647623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,1536,0.018260266383488974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,1024,0.004311466713746389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,1024,0.017072000106175742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,5120,0.019744000832239785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,768,0.003942399968703588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,4096,0.01657919983069102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,768,0.01667413314183553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,512,0.003509333233038584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,3584,0.015383467078208923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,512,0.01601920028527578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,3072,0.013529599706331889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,256,0.003127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,2560,0.01218986709912618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,256,0.01509119967619578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,128,0.0028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,128,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,64,0.002690133452415466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,64,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,1536,0.009245866537094116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,128,32,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,128,32,0.014345600207646688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,1024,0.007507200042406718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,65536,0.03934719959894816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,16384,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,2048,0.010354133447011311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,12288,0.009408000111579894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,16384,0.06922986507415771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,65536,0.25878613789876304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,10240,0.008709333340326945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,10240,0.040642134348551434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,12288,0.04867093165715535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,8192,0.034519465764363606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,768,0.00710399995247523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,8192,0.008110933502515157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,7168,0.007684266567230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,6144,0.007222400108973186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,512,0.006502399841944377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,7168,0.03178133368492127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,4096,0.006402133405208588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,6144,0.029641600449879964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,5120,0.0068256000677744556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,5120,0.028198399146397907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,4096,0.025098667542139692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,256,0.005529599885145823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,3584,0.006712533533573151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,3584,0.02371946573257446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,3072,0.006413866579532623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1024,128,128,0.005086933573087057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,1536,0.0056991999348004665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,3072,0.02225066622098287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,2560,0.006287999947865804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,2560,0.021606399615605672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,2048,0.005580799778302511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,2048,0.020347734292348228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,1536,0.01846826672554016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,1024,0.004497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,1024,0.018055466810862224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,768,0.004013866682847341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,768,0.01676373283068339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,512,0.003573333223660787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,512,0.015797332922617594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,256,0.0032768001159032187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,256,0.015124266346295675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,128,0.0030634666482607523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,128,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,64,0.002993066608905792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,64,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,64,32,0.0029813334345817565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,64,32,0.014803199966748556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,65536,0.037514666716257736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,16384,0.010194133718808491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,65536,0.256712532043457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,16384,0.06883520285288493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,12288,0.008742400010426839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,12288,0.04795413414637248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,10240,0.008026666442553202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,10240,0.04036159912745158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,8192,0.007152000069618225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,8192,0.03402560154596965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,7168,0.006695466736952464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,7168,0.03227413296699524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,6144,0.0061034664511680605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,6144,0.029448533058166505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,5120,0.006459733347098033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,5120,0.02757973273595174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,4096,0.006144000093142191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,4096,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,3584,0.006625066697597504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,3584,0.024251733223597208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,3072,0.006268799801667531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,3072,0.022806400060653688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,2560,0.006123733520507812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,2560,0.021221333742141725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,2048,0.005513600011666616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,2048,0.01943146586418152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,1536,0.004835199813048045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,1536,0.017882666985193887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,1024,0.004110933343569437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,1024,0.016901334126790367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,768,0.003685333331425985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,768,0.017030400037765504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,512,0.003475199888149897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,512,0.015829333662986757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,256,0.0030730667213598887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,256,0.014855466286341348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,128,0.0027722666660944624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,128,0.014713600277900696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,64,0.0026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,64,0.014782933394114175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1024,32,32,0.002721066772937775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1024,32,32,0.014382933576901754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,16384,1.1752875010172525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,12288,0.892635726928711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,12288,1.6613428751627606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,16384,2.3368863423665363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,10240,0.7119306564331055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,10240,1.4228747049967447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,8192,1.112380854288737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,8192,0.5811594645182292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,7168,0.5047701199849446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,7168,0.9514912287394205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,6144,0.4404042561848958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,6144,0.8037301381429037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,5120,0.662608019510905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,5120,0.3801173210144043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,4096,0.5290634791056316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,4096,0.31135571797688805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,12288,1.089249038696289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,10240,0.9389066696166992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,16384,1.5227914174397787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,3584,0.4981813430786133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,3584,0.28538452784220375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,3072,0.24177813529968262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,3072,0.4210176150004069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,2560,0.3415850639343262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,7168,0.6259328206380208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,2560,0.2230954647064209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,2048,0.2761024157206217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,5120,0.4347221374511719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,8192,0.7237024307250977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,2048,0.17029867172241211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,6144,0.5473141352335612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,1536,0.1364192008972168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,1536,0.2195061365763346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,1024,0.147543462117513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,1024,0.1055840015411377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,768,0.11478613217671711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,768,0.08705706596374511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,4096,0.3649258613586426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,512,0.08393920262654622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,512,0.06643626689910889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,2560,0.24009280204772948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,3584,0.32912321090698243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,256,0.05306560198465983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,256,0.05477973222732544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,2048,0.20114026069641114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,128,0.04151360193888347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,3072,0.2830677350362142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,128,0.05033813317616781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,64,0.0407914678255717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,64,0.05156906843185425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,65536,32,0.0391264001528422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,65536,32,0.05188373327255249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,1536,0.16702826817830402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,768,0.11451413631439208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,16384,0.5143253326416015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,1024,0.12749653657277424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,65536,1.324786122639974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,16384,0.3209770520528158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,12288,0.25440425872802735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,65536,2.3563690185546875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,12288,0.38999573389689124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,10240,0.3303413391113281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,10240,0.2352992057800293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,8192,0.2558293342590332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,256,0.07872959772745768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,8192,0.16843946774800617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,128,0.07448639869689941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,65536,512,0.09383359750111898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,7168,0.22501759529113768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,7168,0.14851199785868327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,6144,0.19400533040364581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,6144,0.13100480238596598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,5120,0.16459306081136066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,5120,0.1129599968592326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,10240,0.2243669350941976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,12288,0.26778345108032225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,65536,1.5520960489908853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,4096,0.1350752035776774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,16384,0.36384105682373047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,4096,0.0974250634511312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,3584,0.1190293312072754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,3584,0.08969066937764486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,3072,0.10328959623972575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,3072,0.07829333146413167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,2560,0.08779306411743164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,2560,0.06709012985229493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,7168,0.161407995223999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,5120,0.11326933701833089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,2048,0.07154133319854736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,2048,0.05760000149408976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,1536,0.05484266678492228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,8192,0.18459092775980632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,1536,0.048987734317779544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,1024,0.03948160012563069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,1024,0.03973333438237508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,6144,0.134769074122111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,768,0.0326581339041392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,768,0.035684267679850265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,2560,0.0640725334485372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,512,0.024752000967661537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,4096,0.0973909298578898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,512,0.0312885324160258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,256,0.01750826636950175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,256,0.026766933997472125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,3584,0.08492266337076823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,128,0.013809067010879517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,2048,0.05534293254216512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,128,0.02495253284772237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,64,0.011572266618410747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,3072,0.07508479754130046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,64,0.024755199750264488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,1536,0.046539731820424396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,16384,32,0.011601066589355469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,16384,32,0.02451733350753784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,768,0.033056000868479415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,16384,0.3727146784464518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,65536,0.9801727930704752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,1024,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,16384,0.25363945960998535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,65536,1.7311028798421222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,12288,0.2841973304748535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,12288,0.1916373411814372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,10240,0.23860479990641276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,10240,0.15763840675354004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,8192,0.18908586502075195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,8192,0.1297162691752116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,128,0.021313067277272543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,256,0.023336533705393472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,7168,0.1704842726389567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,7168,0.11626880168914795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,16384,512,0.02791573405265808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,6144,0.1470304012298584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,6144,0.10353386402130127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,5120,0.12153813044230144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,5120,0.08927146593729654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,12288,0.23398186365763346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,4096,0.10001813570658366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,4096,0.07538452943166098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,65536,1.329748280843099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,16384,0.304311466217041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,3584,0.10832106272379557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,3584,0.07011199792226155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,10240,0.19078507423400878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,3072,0.07528213659922281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,3072,0.06057279904683431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,6144,0.11701013247172039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,2560,0.06467306613922119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,2560,0.053471998373667395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,8192,0.1536234696706136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,7168,0.13713599840799967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,2048,0.05149226586023966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,2048,0.046215466658274335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,1536,0.040888532002766924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,1536,0.03909120162328084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,1024,0.02903573314348857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,1024,0.0326581339041392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,5120,0.09873920281728109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,768,0.023937066396077476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,768,0.02951573332150777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,4096,0.08118826548258463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,512,0.018221867084503175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,512,0.025890133778254193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,3072,0.06094933350880941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,256,0.012584533294041952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,256,0.021934932470321654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,3584,0.07138986587524414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,128,0.009498666723569233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,1536,0.03545386791229248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,128,0.020942932367324828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,2560,0.05441813468933106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,64,0.008161066472530365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,64,0.020822399854660036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,12288,32,0.008447999755541485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,12288,32,0.02092906634012858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,2048,0.044395732879638675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,65536,0.8617109298706055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,768,0.023525333404541014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,65536,1.3965365091959634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,1024,0.029788800080617267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,16384,0.3385610580444336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,16384,0.21732692718505858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,12288,0.2508010705312093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,12288,0.17182933489481608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,10240,0.20983893076578775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,10240,0.144541867574056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,512,0.01948053240776062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,8192,0.17017706235249835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,8192,0.11911786397298177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,256,0.01577279965082804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,7168,0.14945813814798992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,7168,0.10475413004557292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,6144,0.12620800336201984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,6144,0.09266560077667237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,12288,128,0.01359999974568685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,5120,0.10778133074442546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,5120,0.08103360335032145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,12288,0.19307519594828287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,4096,0.08577386538187662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,4096,0.06894826889038086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,16384,0.26161386171976725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,65536,1.1011530558268228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,3584,0.07608959674835206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,3584,0.0729429324467977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,3072,0.06751786867777507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,10240,0.16596906979878742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,6144,0.10028693675994874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,3072,0.05775146484375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,2560,0.0558357318242391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,2560,0.0507861336072286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,2048,0.045737600326538085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,7168,0.11923200289408367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,2048,0.04252586762110393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,1536,0.035461334387461345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,1536,0.03630826473236084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,8192,0.13744853337605795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,1024,0.025624533494313557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,1024,0.029502934217453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,5120,0.0875274658203125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,768,0.02076479991277059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,768,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,3072,0.05363413492838541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,4096,0.06946132977803549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,512,0.01653439998626709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,512,0.026759467522303265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,256,0.012967466314633688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,1536,0.03145920038223267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,256,0.02365866700808207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,128,0.010912000139554342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,128,0.022882133722305298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,2560,0.04736640055974324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,64,0.010785067081451416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,3584,0.0627840002377828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,64,0.023256532351175942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,10240,32,0.011255466938018798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,10240,32,0.023293866713841756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,2048,0.040625067551930745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,1024,0.02613760034243266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,65536,0.717241605122884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,65536,1.0634602864583333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,16384,0.2700576146443685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,16384,0.1838912010192871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,12288,0.2006282647450765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,12288,0.14092052777608235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,768,0.02169493238131205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,10240,0.16967147191365559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,10240,0.1204266627629598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,256,0.01504746675491333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,8192,0.13260266780853272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,128,0.013090133666992188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,8192,0.09955413341522217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,10240,512,0.01790613333384196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,7168,0.11649386882781983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,7168,0.09036906560262045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,6144,0.1031061331431071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,6144,0.0803434689839681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,5120,0.08642666339874268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,5120,0.06933013598124185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,65536,0.7737034479777019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,4096,0.08278720378875733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,4096,0.0629472017288208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,12288,0.1395466645558675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,10240,0.11892266273498535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,16384,0.1873685359954834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,3584,0.06037333408991495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,3584,0.052475734551747644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,3072,0.05229973395665487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,3072,0.047169065475463866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,2560,0.044750932852427164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,2560,0.04183146556218465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,7168,0.08315839767456054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,2048,0.03575466473897298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,5120,0.06117653449376424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,2048,0.036408531665802005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,1536,0.02808213432629903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,1536,0.03192639946937561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,8192,0.0990933338801066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,1024,0.020733867088953653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,1024,0.026923733949661254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,6144,0.07400639851888022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,768,0.016782933473587038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,768,0.024618667364120484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,2560,0.03560959895451864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,4096,0.05199786822001139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,512,0.013186132907867432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,512,0.02174506584803263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,256,0.009179733196894328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,256,0.01960960030555725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,3584,0.04605653285980225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,128,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,128,0.018908800681432088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,3072,0.04097386598587036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,64,0.006215466558933258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,2048,0.03147413333257039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,64,0.01884160041809082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,8192,32,0.006602666775385539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,8192,32,0.01890666683514913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,1536,0.026173865795135497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,65536,0.6455637613932292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,65536,0.9203423817952474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,16384,0.22883307139078773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,768,0.019313067197799683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,16384,0.18167680104573566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,12288,0.17486507097880047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,12288,0.13200106620788574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,10240,0.14603840510050456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,10240,0.1098965326944987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,256,0.01362559994061788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,1024,0.02167146603266398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,8192,0.11745493412017823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,8192,0.09059840043385824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,128,0.012689066926638284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,8192,512,0.01573973298072815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,7168,0.10206293265024821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,7168,0.08113813400268555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,6144,0.08823253313700358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,6144,0.0721173365910848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,5120,0.07450239658355713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,5120,0.062717866897583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,12288,0.142902406056722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,4096,0.06100053389867147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,4096,0.05332479874293009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,65536,0.7419530868530273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,16384,0.18570133845011394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,3584,0.06467946767807006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,3584,0.048102398713429764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,3072,0.04548799991607666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,3072,0.04289386669794719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,10240,0.11939413547515869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,2560,0.03888320128122966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,6144,0.07153386274973551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,2560,0.03774720033009847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,2048,0.03174613316853841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,2048,0.03355093399683635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,7168,0.08537706534067789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,1536,0.024780799945195518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,1536,0.029139200846354168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,8192,0.09947413603464762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,1024,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,1024,0.024618667364120484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,5120,0.06175680160522461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,3072,0.03922346830368042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,768,0.014619732896486918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,768,0.02272319992383321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,4096,0.05087146759033203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,512,0.011291733384132386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,512,0.021314134200414024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,256,0.008144000172615051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,256,0.019244800011316933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,3584,0.0450272003809611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,128,0.007288533449172974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,1536,0.02355626622835795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,128,0.018989866971969603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,2560,0.03543786605199178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,64,0.006118399898211161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,64,0.018756266434987387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,7168,32,0.0065087998906771345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,7168,32,0.01890773375829061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,65536,0.7421173095703125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,65536,0.5621983846028645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,2048,0.028973867495854694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,1024,0.01955946683883667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,16384,0.1874069372812907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,16384,0.146615473429362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,12288,0.14164907137552898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,12288,0.1133237361907959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,10240,0.12013119856516521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,10240,0.0974069356918335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,256,0.010824533303578694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,8192,0.09700373013814291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,128,0.009541333715120951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,8192,0.08123199939727783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,512,0.01297599971294403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,7168,0.0853823979695638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,7168,0.07360640366872152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,7168,768,0.015849600235621132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,6144,0.0730293353398641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,6144,0.06511679887771607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,5120,0.061179733276367186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,5120,0.056968533992767335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,12288,0.12284053166707357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,4096,0.04989653428395589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,4096,0.04861119985580444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,16384,0.1651583989461263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,3584,0.044761598110198975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,65536,0.67357972462972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,3584,0.04329813321431478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,3072,0.038815999031066896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,3072,0.03925120035807292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,10240,0.10418559710184734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,2560,0.032917332649230954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,2560,0.035447466373443606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,6144,0.063209601243337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,2048,0.027140265703201293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,2048,0.03097493251164754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,7168,0.07430400053660074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,1536,0.021498666206995646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,1536,0.02805440028508504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,8192,0.08595413366953532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,1024,0.016285866498947144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,1024,0.023269333442052207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,5120,0.05456426541010538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,768,0.013464533289273582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,768,0.021576533714930214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,3072,0.03537280162175496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,512,0.010447999835014344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,512,0.02025173306465149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,4096,0.04498879909515381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,256,0.007718400160471599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,256,0.018463999032974243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,3584,0.040693334738413495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,128,0.00629013329744339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,128,0.01755626598993937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,2560,0.031292800108591715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,64,0.005463466544946035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,64,0.017595734198888144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,6144,32,0.005801600217819214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,6144,32,0.01771199901898702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,1536,0.021811199188232423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,65536,0.6575146357218424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,65536,0.5177119890848796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,16384,0.1675381342569987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,2048,0.026735999186833698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,16384,0.1367509365081787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,1024,0.018074667453765868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,12288,0.12678613662719726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,12288,0.10450987021128337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,768,0.015075199802716575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,10240,0.10637013117472331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,10240,0.08974400361378988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,8192,0.08445119857788086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,512,0.012364799777666729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,256,0.010917333761850993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,8192,0.07550186316172282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,7168,0.0738922675450643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,7168,0.06824213663736979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,6144,128,0.009739733735720317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,6144,0.06420586506525675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,6144,0.060686933994293216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,5120,0.053554133574167884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,5120,0.053284267584482826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,12288,0.10728212992350261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,4096,0.04444160064061482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,4096,0.04501226743062337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,16384,0.14035092989603679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,65536,0.5563594818115234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,3584,0.03933440049489339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,3584,0.04161706765492757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,3072,0.03455253442128499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,6144,0.0547871987024943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,10240,0.09078613122304281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,3072,0.03716906706492106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,2560,0.02937813401222229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,2560,0.03349119822184245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,2048,0.02476693391799927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,2048,0.029717334111531574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,7168,0.0641759991645813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,1536,0.01957119901974996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,1536,0.02643199960390727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,8192,0.07440106868743897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,1024,0.014802133043607077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,1024,0.023167999585469563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,5120,0.04760533173878988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,3072,0.030771199862162275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,768,0.012266666690508524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,768,0.02135573426882426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,512,0.009646933277448018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,4096,0.0392031987508138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,512,0.019686400890350342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,256,0.007084799806276958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,256,0.01795626680056254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,1536,0.019233065843582153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,128,0.005883733431498209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,128,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,2560,0.027641600370407103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,64,0.005523199836413065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,64,0.0170197327931722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,3584,0.03539520104726156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,5120,32,0.005649066468079885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,5120,32,0.016816000143686928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,65536,0.5302613258361817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,2048,0.0237226665019989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,65536,0.44274133046468095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,16384,0.13247040112813313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,16384,0.11632853349049885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,1024,0.015808000167210897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,12288,0.09961067040761312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,12288,0.09132266839345296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,10240,0.0844170649846395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,10240,0.07951467037200928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,256,0.009761066238085429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,128,0.008809600273768108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,8192,0.0668234666188558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,8192,0.06672639846801758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,512,0.010999466975529988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,7168,0.058723199367523196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,7168,0.05957546631495157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,6144,0.0513589342435201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,5120,768,0.013236266374588013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,6144,0.05268373489379883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,5120,0.04330026706059774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,5120,0.04520426591237386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,12288,0.08758506774902344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,4096,0.03508053223292033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,4096,0.03856319983800252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,16384,0.11647253036499024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,65536,0.4644330660502116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,3584,0.031098665793736775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,3584,0.03782293399175008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,3072,0.02781013250350952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,3072,0.033421866099039715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,10240,0.07390613555908203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,2560,0.023253333568573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,2560,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,6144,0.04568746487299601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,2048,0.019324799378712974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,2048,0.0268394668896993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,7168,0.05302293300628662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,1536,0.015714133779207863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,1536,0.023308799664179484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,8192,0.06047253211339315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,1024,0.011736533045768738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,1024,0.021143466234207153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,5120,0.039692799250284835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,768,0.009593600034713745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,768,0.019721599419911702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,3072,0.02626986702283223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,4096,0.03283626635869344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,512,0.00786240001519521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,512,0.01858453353246053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,256,0.006041599810123444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,256,0.017164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,2560,0.023149865865707397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,3584,0.02948906620343526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,128,0.005087999999523163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,128,0.016590933005015053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,64,0.004462933540344239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,64,0.016315733393033348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,4096,32,0.004665599763393402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,4096,32,0.016402133305867515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,1536,0.017040000359217326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,65536,0.45534187952677413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,65536,0.41130558649698895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,2048,0.020245333512624107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,16384,0.1129034678141276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,16384,0.10796693166097004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,1024,0.013644799590110779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,12288,0.08706986904144287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,12288,0.08488106727600098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,768,0.011684266726175944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,10240,0.07377280394236246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,256,0.00876693328221639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,10240,0.07404053211212158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,512,0.009937066833178203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,4096,128,0.0081535999973615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,8192,0.059774935245513916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,8192,0.0625055988629659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,7168,0.052648532390594485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,7168,0.056949333349863684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,6144,0.04554133415222168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,6144,0.04980053504308064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,5120,0.03868800004323324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,5120,0.04267093340555827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,65536,0.4470207850138347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,12288,0.0852170705795288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,16384,0.11221226851145427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,4096,0.03176640073458354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,4096,0.03699626525243123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,3584,0.028033065795898437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,3584,0.03470293283462524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,3072,0.02444480061531067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,3072,0.03192960023880005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,10240,0.07135252952575684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,6144,0.044818135102589925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,2560,0.021057067314783733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,2560,0.028593067328135175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,2048,0.018051199118296304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,7168,0.051380264759063723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,2048,0.026024534304936724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,1536,0.014800000190734863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,8192,0.059759998321533205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,1536,0.02446613311767578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,1024,0.011571199695269267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,1024,0.02077546715736389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,5120,0.03911679983139038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,768,0.009495466947555542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,768,0.01994880040486654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,3072,0.025646932919820148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,4096,0.03232959906260173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,512,0.0076885332663853955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,512,0.018601600329081217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,256,0.006155733267466227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,256,0.017460266749064125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,1536,0.016296533743540446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,3584,0.029155200719833373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,128,0.005365333457787832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,128,0.01697280009587606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,2560,0.023082667589187623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,64,0.004807466765244802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3584,32,0.005026133358478546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,64,0.017127466201782227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3584,32,0.01700906753540039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,65536,0.3863423983256022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,65536,0.390177059173584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,2048,0.019883733987808228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,1024,0.013290666540463767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,16384,0.10054612954457601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,16384,0.10022613207499187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,12288,0.07591679890950521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,12288,0.07864853541056314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,768,0.011373866597811382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,10240,0.06374400059382121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,10240,0.06881706714630127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,8192,0.051577599843343105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,8192,0.0580021341641744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,256,0.008499200145403545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,512,0.009900800387064616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3584,128,0.007733333110809326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,7168,0.04416319926579793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,7168,0.05223893324534098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,6144,0.038262399037679036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,6144,0.04533439874649048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,5120,0.032780800263086954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,5120,0.03916586637496948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,12288,0.07784319718678792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,4096,0.02699199914932251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,4096,0.03427733182907104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,16384,0.1059114694595337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,65536,0.4296863873799642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,3584,0.023897600173950196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,3584,0.03254186709721883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,10240,0.06462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,3072,0.021826134125391642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,3072,0.029226666688919066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,2560,0.018011732896169027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,2560,0.02662293314933777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,8192,0.05347520112991333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,6144,0.04064000050226847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,2048,0.015380266308784484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,2048,0.02412373423576355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,1536,0.012460800011952718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,7168,0.047261865933736165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,1536,0.02190720041592916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,5120,0.03509226640065511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,1024,0.009465600053469341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,1024,0.019745065768559774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,768,0.007885866860548655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,768,0.01871253252029419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,4096,0.029306666056315107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,3072,0.02327466607093811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,512,0.006545066833496094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,512,0.017837866147359212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,256,0.005127466718355815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,256,0.016822399695714314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,3584,0.026761599381764728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,2560,0.02137813369433085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,128,0.004650666813055674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,128,0.016156799594561257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,64,0.004125866790612539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,64,0.015845333536465965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,3072,32,0.004226133227348328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,3072,32,0.016365866859753928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,1536,0.014568533500035605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,65536,0.33905706405639646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,2048,0.018042665719985963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,65536,0.3634911855061849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,16384,0.0869119962056478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,16384,0.09384426275889078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,12288,0.06616746584574382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,12288,0.0748255968093872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,1024,0.01227946678797404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,768,0.010673066973686219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,10240,0.054381867249806724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,256,0.008155733346939087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,512,0.009142399827639262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,10240,0.06447679996490478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,3072,128,0.007507200042406718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,8192,0.044232531388600664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,8192,0.05407786766688029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,7168,0.03877653280893962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,7168,0.047888000806172684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,6144,0.03368000189463298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,6144,0.04191146691640218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,5120,0.028675200541814168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,5120,0.03708159923553467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,12288,0.06283200184504191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,4096,0.023639466365178427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,4096,0.032740267117818196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,65536,0.3184981346130371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,3584,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,16384,0.08329493204752604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,3584,0.030988800525665283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,3072,0.01876586675643921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,3072,0.028220800558725993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,6144,0.033802668253580734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,2560,0.016350932916005454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,2560,0.025938133398691814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,10240,0.052840534845987955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,2048,0.013707733154296875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,2048,0.024168533086776734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,7168,0.03884906768798828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,1536,0.011457066734631855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,1536,0.021517866849899293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,8192,0.043958401679992674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,1024,0.008603733777999879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,1024,0.019207467635472618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,5120,0.02914239962895711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,768,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,768,0.01844373345375061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,3072,0.01967466672261556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,4096,0.024280534187952677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,512,0.006084266801675161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,512,0.01752106746037801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,256,0.004957866668701172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,256,0.01625706652800242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,3584,0.022204800446828207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,128,0.00425600012143453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,1536,0.01244586706161499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,128,0.015710933009783427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,64,0.004011733333269755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,2560,0.017862399419148765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,64,0.015595733126004537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2560,32,0.0039893334110577905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2560,32,0.015537066260973611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,65536,0.26102399826049805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,65536,0.32393067677815757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,16384,0.07617706457773844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,16384,0.09275946617126465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,2048,0.01532373329003652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,1024,0.010529067118962605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,12288,0.058830932776133216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,12288,0.06891733010609945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,10240,0.04854079882303874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,768,0.009099733829498292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,10240,0.05979839960734049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,8192,0.04185386498769124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,8192,0.05023573239644369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,256,0.007165866593519847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,7168,0.03537173271179199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,7168,0.0440287987391154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,512,0.007864533364772797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2560,128,0.006434133152167003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,6144,0.029988267024358112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,6144,0.038754133383433025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,5120,0.024943999449412026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,5120,0.03366080125172933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,12288,0.05309546788533529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,4096,0.021262933810551964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,65536,0.2710826555887858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,4096,0.029587199290593464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,3584,0.01935360034306844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,3584,0.027537065744400024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,10240,0.04504640102386474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,16384,0.072326397895813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,3072,0.016810667514801026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,3072,0.02653440038363139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,2560,0.01328213314215342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,2560,0.023874133825302124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,7168,0.0331989328066508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,2048,0.011307733257611592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,8192,0.03764479955037435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,2048,0.021241599321365358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,6144,0.028520532449086505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,1536,0.009484799702962239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,1536,0.019882667064666747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,1024,0.007228800157705943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,1024,0.018768000602722167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,5120,0.024906667073567708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,4096,0.021100799242655434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,768,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,3072,0.01744640072186788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,768,0.017644800742467246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,512,0.005211733281612396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,512,0.016951467593510947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,256,0.0044159998496373495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,256,0.01602133313814799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,3584,0.019177599747975668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,1536,0.011061333616574605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,128,0.0038474666575590765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,128,0.015546666582425437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,64,0.0036138666172822317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,64,0.015683199961980185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,2560,0.015200000007947287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,2048,32,0.00382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,2048,32,0.015668267011642457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,65536,0.20022826194763182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,65536,0.2904746691385905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,16384,0.05759359995524088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,2048,0.012916266918182373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,1024,0.009435733159383137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,16384,0.0789685328801473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,12288,0.04515626827875773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,12288,0.0623583992322286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,10240,0.03935253222783407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,10240,0.05270613431930542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,256,0.006623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,128,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,8192,0.028819199403127032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,512,0.007276799778143566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,8192,0.04277546803156535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,7168,0.025849600632985432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,7168,0.039230934778849286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,2048,768,0.008393599589665731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,6144,0.02222933371861776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,6144,0.03493119875590007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,5120,0.0191210667292277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,5120,0.033291733264923094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,65536,0.2611658732096354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,12288,0.04848426580429077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,4096,0.016135467092196147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,16384,0.06291306813557943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,4096,0.03015679915746053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,3584,0.014504533012708029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,3584,0.02749333381652832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,3072,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,3072,0.025731199979782106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,10240,0.04103680054346721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,2560,0.012630400061607362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,2560,0.023421865701675416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,6144,0.02609066764513652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,2048,0.010822400450706482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,2048,0.021755733092625937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,7168,0.029832533995310467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,1536,0.008779733379681905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,8192,0.03410880168279012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,1536,0.020104533433914183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,1024,0.007076266904671986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,1024,0.017959467569986978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,5120,0.022957867383956908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,768,0.006078933179378509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,768,0.017414400974909462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,3072,0.015869866808255514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,4096,0.019381332397460937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,512,0.005093333125114441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,512,0.016406400005022685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,256,0.004154666761557261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,256,0.016119466225306193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,3584,0.01753386656443278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,128,0.0036799999574820197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,128,0.015662933389345803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,2560,0.014004266262054444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,1536,0.01046399970849355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,64,0.00342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,64,0.015545599659283958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1536,32,0.0035189333061377203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1536,32,0.015024000406265258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,2048,0.012019200126330058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,65536,0.13779840469360352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,1024,0.008938666184743245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,65536,0.2550133387247721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,16384,0.039426132043202715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,16384,0.08053013483683268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,12288,0.03091520071029663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,768,0.008016000191370647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,12288,0.06079146862030029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,10240,0.03838293155034383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,10240,0.050120532512664795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,256,0.006298666695753734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,8192,0.031541333595911665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,512,0.006929066777229309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1536,128,0.005794133245944977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,8192,0.04267413218816121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,7168,0.026829866568247478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,7168,0.038038400808970134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,6144,0.02359893321990967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,6144,0.03400426705678304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,5120,0.01995519995689392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,5120,0.02851733366648356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,12288,0.04270506699879964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,65536,0.25463892618815104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,16384,0.057929599285125734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,4096,0.017846399545669557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,4096,0.02491413354873657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,3584,0.015121066570281982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,3584,0.023996800184249878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,3072,0.012873599926630655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,3072,0.02276159922281901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,10240,0.0356874664624532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,2560,0.008963200449943542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,6144,0.022900267442067464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,2560,0.02073919971783956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,7168,0.02593066692352295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,8192,0.029873067140579225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,2048,0.007226666808128357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,2048,0.019653334220250448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,1536,0.006433066725730896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,1536,0.018913066387176512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,1024,0.005297066768010458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,1024,0.017366399367650352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,5120,0.01994880040486654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,768,0.004858666658401489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,768,0.01683733264605204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,3072,0.013613866766293845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,4096,0.01690346598625183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,512,0.004278400043646494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,512,0.016126933693885803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,256,0.0037109332780043284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,3584,0.01513920029004415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,256,0.015666133165359496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,128,0.0033802665770053864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,2560,0.0121888001759847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,128,0.015035733580589294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,1536,0.009075199564297993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,64,0.0031968000034491217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,64,0.015284267067909241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,1024,32,0.0032650666932264962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,1024,32,0.015280000368754067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,2048,0.010632533828417461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,65536,0.1053653319676717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,16384,0.032118399937947587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,65536,0.23815360069274902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,16384,0.06811199982961019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,768,0.007162666817506154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,12288,0.025843199094136553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,12288,0.05137706597646078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,1024,0.008010666569073994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,10240,0.023285333315531412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,10240,0.04323519865671794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,8192,0.019045333067576088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,8192,0.0364682674407959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,256,0.005627733469009399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,512,0.006276266773541768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,7168,0.016816000143686928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,1024,128,0.005354666709899902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,7168,0.03314773241678874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,6144,0.014983466267585755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,6144,0.030009599526723225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,5120,0.013261866569519044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,5120,0.02913813392321269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,65536,0.20898879369099937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,12288,0.04049386580785115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,16384,0.05346773465474447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,4096,0.011204266548156738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,4096,0.025949867566426595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,3584,0.01015786627928416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,3584,0.024337067206700643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,3072,0.009175466497739156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,10240,0.03445333242416382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,3072,0.023490132888158162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,2560,0.009877333045005798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,2560,0.021814399957656862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,8192,0.028946133454640706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,6144,0.022316799561182658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,2048,0.008869333068529765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,7168,0.025639466444651288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,2048,0.020645334323247274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,1536,0.007371733089288075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,1536,0.019076265891393027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,5120,0.019489065806070963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,1024,0.005764266848564148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,1024,0.017166932423909508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,768,0.005070933202902476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,768,0.01656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,3072,0.013159466783205667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,4096,0.016012799739837647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,512,0.004401066899299621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,512,0.016239999731381734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,256,0.0034933333595593774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,3584,0.014820266763369241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,256,0.01566933294137319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,128,0.003268266717592875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,2560,0.011907200018564861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,128,0.015122133493423461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,64,0.003067733347415924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,64,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,768,32,0.0032000000278155005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,768,32,0.015086932977040609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,65536,0.07589546839396158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,65536,0.2220832029978434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,1536,0.00892799993356069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,16384,0.02443839907646179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,2048,0.010593066612879436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,16384,0.06342186530431113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,12288,0.01890773375829061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,12288,0.047601068019866945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,768,0.0070709332823753355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,1024,0.00788373351097107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,10240,0.01694186727205912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,10240,0.04082133372624715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,8192,0.016389333208402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,512,0.006213333209355672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,8192,0.03477226495742798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,256,0.005602133274078369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,7168,0.014761599898338317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,768,128,0.0053045332431793215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,7168,0.031965865691502886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,6144,0.013170133034388224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,6144,0.029123200972874956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,5120,0.011422933141390482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,5120,0.026002132892608644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,65536,0.19957440694173176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,4096,0.009872000416119893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,4096,0.024012800057729086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,16384,0.051482665538787845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,12288,0.03829653263092041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,3584,0.008796800176302593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,3584,0.02307093342145284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,3072,0.008070399860541026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,3072,0.021277866760889688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,10240,0.033267199993133545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,2560,0.007209600011507671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,6144,0.020823466777801513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,8192,0.027357866366704304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,2560,0.01994026700655619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,2048,0.0065087998906771345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,7168,0.024349866310755412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,2048,0.018564265966415406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,1536,0.005700266857941946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,1536,0.018228266636530557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,1024,0.004600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,1024,0.017116800944010416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,5120,0.01798080007235209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,768,0.0042581334710121155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,768,0.01704960068066915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,4096,0.015431466698646545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,3072,0.01253973344961802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,512,0.0038730666041374207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,3584,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,512,0.01611733337243398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,256,0.00342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,256,0.015009066462516785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,128,0.0031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,128,0.015159466862678527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,2560,0.011587199568748475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,64,0.003014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,1536,0.008601599931716919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,64,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,512,32,0.002996266633272171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,2048,0.009905067086219788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,512,32,0.014837333559989929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,65536,0.045261867841084796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,1024,0.0075989335775375364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,65536,0.20755093892415366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,16384,0.014382933576901754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,16384,0.056459732850392665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,12288,0.01211840013662974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,12288,0.042156799634297686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,768,0.00673280010620753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,10240,0.013333333532015481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,10240,0.036525865395863846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,8192,0.01167039970556895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,256,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,512,0.006100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,8192,0.03265173236529033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,7168,0.010621866583824158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,512,128,0.005145599941412607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,7168,0.030313599109649658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,6144,0.009806933005650838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,6144,0.028051199515660603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,65536,0.16912213961283368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,5120,0.00927786628405253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,16384,0.04463040033976237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,5120,0.02550400098164876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,4096,0.008888533711433411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,12288,0.03396799961725871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,4096,0.023638399442036946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,3584,0.008004266520341237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,3584,0.02210986614227295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,10240,0.02879146734873454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,3072,0.007214933137098948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,3072,0.021157334248224892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,2560,0.006424533327420552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,2560,0.01996586720148722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,8192,0.023578667640686037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,2048,0.005730133255322774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,2048,0.018681599696477254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,7168,0.021154133478800456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,1536,0.0049898669123649595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,6144,0.01869866649309794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,1536,0.017525333166122436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,1024,0.004264533519744873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,1024,0.01676586667696635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,5120,0.01625920037428538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,768,0.003952000041802724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,768,0.016297599673271178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,4096,0.01411626636981964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,512,0.003603200117746989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,512,0.015892266233762106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,3072,0.011784533659617107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,256,0.00323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,3584,0.013156267007191977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,256,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,128,0.0029567999144395193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,128,0.014751999576886495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,2560,0.01083733340104421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,64,0.002765866617361705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,64,0.014679466684659323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,256,32,0.002828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,256,32,0.014467199643452963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,65536,0.037257599830627444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,2048,0.009173333644866943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,65536,0.20124373435974122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,16384,0.011421866218249003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,1024,0.007079466680685679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,16384,0.051717332998911535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,1536,0.008426666259765625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,12288,0.009553066889444987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,12288,0.03836053212483724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,10240,0.008697600166002909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,10240,0.03515199820200603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,768,0.006401066482067108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,512,0.0056981335083643595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,8192,0.008020266890525818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,8192,0.030921600262324017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,7168,0.007655466596285502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,256,0.005156266689300537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,7168,0.029040000836054486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,6144,0.007113599777221679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,256,128,0.004849066833655039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,6144,0.026845866441726686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,5120,0.008042666812737782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,5120,0.025304534037907916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,65536,0.16649279594421387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,4096,0.007362133264541626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,4096,0.023079466819763184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,16384,0.044293332099914554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,3584,0.0069248000780741375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,12288,0.03374933401743571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,3584,0.021997867027918498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,3072,0.006593066453933716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,3072,0.021211733420689903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,10240,0.02841599980990092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,8192,0.0236245334148407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,2560,0.006209066510200501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,2560,0.019713066021601357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,2048,0.005657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,2048,0.018475733200709023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,7168,0.021081600586573282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,1536,0.004905599852403005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,6144,0.018662399053573607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,1536,0.017684266964594523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,1024,0.004196266829967499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,5120,0.016238933801651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,1024,0.016772266228993735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,768,0.0037962667644023894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,768,0.016541866461435954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,4096,0.01390720009803772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,512,0.003458133339881897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,512,0.01584106683731079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,3584,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,3072,0.01151146690050761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,256,0.0031445334355036415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,2048,0.00897173285484314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,256,0.015058133006095886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,2560,0.010728533069292705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,128,0.0029045333464940387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,128,0.014703999956448874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,1024,0.007118933399518331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,64,0.0026986666023731233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,1536,0.0083146666487058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,128,32,0.002721066772937775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,64,0.014257066448529563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,768,0.006404266754786174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,128,32,0.014490666985511779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,65536,0.03200640082359314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,16384,0.00905386706193288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,65536,0.19837013880411786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,16384,0.049149866898854574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,12288,0.008313600222269695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,12288,0.0378165324529012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,10240,0.0076906666159629825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,8192,0.006929066777229309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,10240,0.03340799808502197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,8192,0.029483733574549358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,7168,0.006675200164318084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,6144,0.00631573349237442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,7168,0.028595199187596638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,6144,0.026049067576726277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,5120,0.00673280010620753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,5120,0.025091199080149333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,4096,0.006258133550484974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,4096,0.02276479999224345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,3584,0.006635733445485433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,3584,0.021826134125391642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,3072,0.006292266647020976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,3072,0.02020373344421387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,2560,0.006186666587988535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,512,0.005559466779232025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,2560,0.019318399826685588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,2048,0.005468800167242686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,2048,0.018210132916768394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,1536,0.004830933113892873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,1536,0.01780160069465637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,1024,0.004125866790612539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,256,0.005275733272234599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,1024,0.016790399948755898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,768,0.0038111999630928038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,768,0.01606933375199636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,512,0.0034229333202044168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,768,128,128,0.004860800007979075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,512,0.015702399611473083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,64,0.014512000481287637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,256,0.003156266609827677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,32,0.014340266585350037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,256,0.014854400356610616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,128,0.002906666696071625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,64,128,0.014706133802731832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,64,0.002624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,64,32,0.0027776000400384264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,12288,0.03634346723556518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,65536,0.03194560011227925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,16384,0.008400000135103862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,65536,0.1974378744761149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,16384,0.0486517349878947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,12288,0.007295999924341838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,10240,0.006871466835339864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,10240,0.03295253316561381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,8192,0.006586666901906331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,8192,0.029554132620493574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,7168,0.006293333570162455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,4096,0.006259199976921081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,7168,0.028331732749938963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,6144,0.0061141331990559895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,6144,0.026124799251556398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,5120,0.00660159985224406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,5120,0.025029333432515462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,3584,0.0067007998625437425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,4096,0.02290346622467041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,3584,0.021386667092641195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,2048,0.018334933121999106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,3072,0.00628053347269694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,3072,0.020983467499415077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,2560,0.006146133442719777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,2560,0.01967466672261556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,2048,0.005434666574001312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,1536,0.004741333425045013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,1536,0.017910399039586387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,1024,0.004149333387613296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,1024,0.01699840029080709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,768,0.0037248000502586366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,768,0.01617280046145121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,512,0.0033269333342711128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,512,0.01585706671079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,256,0.0029813334345817565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,256,0.014630400141080222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,128,0.0027615999182065325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,128,0.014681599537531533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,64,0.0027285332481066385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,64,0.014595199624697366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,768,32,32,0.0026357332865397137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,768,32,32,0.014546133081118264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,16384,0.7743637084960937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,12288,0.5717045466105144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,16384,0.9848128000895182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,16384,1.52740478515625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,12288,1.1523573557535807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,10240,0.8941301345825196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,10240,0.5057194709777832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,8192,0.4001973470052083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,8192,0.7733397165934245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,7168,0.6416213353474934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,7168,0.3519189198811849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,6144,0.5874698638916016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,6144,0.3130101203918457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,5120,0.2614410718282064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,5120,0.49773225784301756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,4096,0.36385278701782225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,4096,0.20875733693440757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,12288,0.7022623697916667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,10240,0.5783711751302083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,3584,0.3126869201660156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,3584,0.19204799334208172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,8192,0.4834933280944824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,3072,0.2705162684122721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,3072,0.16553707122802735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,7168,0.42174612681070967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,2560,0.14263787269592285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,2560,0.22768106460571289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,2048,0.18647146224975586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,2048,0.1206506649653117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,1536,0.14229119618733724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,1536,0.09713599681854249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,5120,0.303276793162028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,1536,0.11432106494903564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,1024,0.10306773185729981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,1024,0.07510186831156412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,4096,0.2516095956166585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,1024,0.09048106670379638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,768,0.0787722667058309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,768,0.06382400194803874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,512,0.05757013161977133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,6144,0.36346346537272134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,512,0.05199786822001139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,3584,0.21573972702026367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,256,0.04027200142542521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,256,0.04330346584320068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,128,0.03049280047416687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,3072,0.19304854075113934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,128,0.03927040100097656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,64,0.028189865748087566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,64,0.03977706829706828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,65536,32,0.02714346647262573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,2560,0.1635573387145996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,65536,32,0.040201600392659506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,2048,0.13884053230285645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,16384,0.34665813446044924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,65536,0.8626890818277995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,16384,0.21241493225097657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,65536,1.5386688232421875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,768,0.07948799928029379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,12288,0.26126186052958167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,512,0.06530666748682658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,12288,0.20126719474792482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,10240,0.22050347328186035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,256,0.05507626533508301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,10240,0.14049599965413412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,65536,128,0.051668266455332436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,8192,0.17609386444091796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,8192,0.11536213556925456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,7168,0.1561397393544515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,7168,0.10475306510925293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,6144,0.13455467224121093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,6144,0.09090879758199057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,5120,0.11168106396993001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,5120,0.07886506716410319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,5120,0.07779626846313477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,4096,0.09026986757914225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,65536,0.9868703842163086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,4096,0.0664469321568807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,16384,0.23731840451558434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,3584,0.07954986890157065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,12288,0.18101545969645183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,3584,0.06067413489023844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,3072,0.06975146929423014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,3072,0.05450559854507446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,10240,0.151365327835083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,2560,0.05862186749776205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,8192,0.12207679748535157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,2560,0.04815680185953776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,7168,0.1066805362701416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,2048,0.04770239988962809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,2048,0.0427072008450826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,1536,0.037351465225219725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,6144,0.09446506500244141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,1536,0.03588800032933553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,1024,0.026732800404230754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,1024,0.030228267113367718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,768,0.021425066391626994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,768,0.02685760060946147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,4096,0.0671189308166504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,512,0.016503467162450155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,3584,0.05827626784642538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,512,0.024218666553497314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,3072,0.051666132609049474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,256,0.01165226697921753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,2560,0.044044800599416095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,256,0.021735467513402305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,2048,0.03837546507517497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,128,0.00892799993356069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,128,0.019335466623306274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,1536,0.03328959941864014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,64,0.007336533566315968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,64,0.01983893314997355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,16384,32,0.007686399916807811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,1024,0.02651626666386922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,16384,32,0.019939200083414713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,768,0.023587199052174886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,16384,0.2751008033752441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,65536,0.6943210601806641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,65536,1.1654677073160806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,16384,0.17497493426005045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,12288,0.24656960169474282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,12288,0.13547840118408203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,512,0.019731199741363524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,10240,0.1822666645050049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,10240,0.11588266690572102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,256,0.01698453426361084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,8192,0.13584319750467938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,8192,0.09562346935272217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,16384,128,0.015280000368754067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,7168,0.11910719871520996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,7168,0.08560319741566977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,6144,0.1022976001103719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,6144,0.07565120061238607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,5120,0.08841813405354818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,5120,0.06566933393478394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,10240,0.11322986284891765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,4096,0.07033920288085938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,65536,0.7425269444783529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,4096,0.05602346658706665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,12288,0.13809067408243816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,3584,0.06202666759490967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,3584,0.05061653455098471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,3072,0.05463360150655111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,16384,0.1814037322998047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,3072,0.046588798364003495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,7168,0.0836138645807902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,2560,0.04621866544087728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,2560,0.04062826633453369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,5120,0.060652800401051844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,2048,0.037804798285166426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,2048,0.03569706678390503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,1536,0.02927466630935669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,1536,0.03165973424911499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,8192,0.09316266377766927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,6144,0.0704906702041626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,1024,0.020862932999928793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,1024,0.027100799481074016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,768,0.016964266697565712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,768,0.02445759971936544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,2560,0.034746666749318436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,512,0.013201066851615905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,512,0.02184213399887085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,4096,0.05068693161010742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,256,0.009578667084376017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,256,0.01930453379948934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,2048,0.03088853359222412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,3584,0.0452181339263916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,128,0.007051733136177063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,128,0.01831573247909546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,64,0.005980800092220307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,64,0.018594133853912353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,1536,0.026062933603922527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,3072,0.0399946649869283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,12288,32,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,12288,32,0.01894506613413493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,768,0.018860799074172974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,16384,0.2162933349609375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,65536,0.5699690500895183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,16384,0.1479157288869222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,65536,0.87762451171875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,12288,0.16321813265482585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,1024,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,12288,0.12849386533101398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,10240,0.13893653551737467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,10240,0.10398293336232503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,8192,0.11311893463134766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,8192,0.08505813280741373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,128,0.012487467130025227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,512,0.016005333264668783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,7168,0.09922026793162028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,12288,256,0.013678933183352152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,7168,0.07500906785329184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,6144,0.08430186907450357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,6144,0.06681386629740396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,5120,0.07229546705881754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,5120,0.05810986757278443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,12288,0.13120426336924235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,4096,0.056910932064056396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,4096,0.049532798926035564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,65536,0.7168565114339193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,16384,0.17600107192993164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,3584,0.050810666879018154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,3584,0.045305601755778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,10240,0.11335039933522542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,3072,0.04447786808013916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,3072,0.04147200187047322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,6144,0.06852693557739258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,2560,0.037557331720987956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,8192,0.09203200340270996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,2560,0.03691519896189372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,2048,0.031115732590357464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,2048,0.03284479975700379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,7168,0.08186559677124024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,1536,0.02458453377087911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,1536,0.02874026695887248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,1024,0.018081067005793254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,1024,0.02510506709416707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,5120,0.059623467922210696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,768,0.014959999918937683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,768,0.023150932788848878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,4096,0.04774293502171834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,512,0.011818666259447734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,512,0.021025067567825316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,3072,0.037495466073354085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,256,0.008657067020734151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,256,0.018963199853897095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,3584,0.04312746524810791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,128,0.0067114666104316715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,128,0.01807360053062439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,1536,0.02329813241958618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,2560,0.03378986517588298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,64,0.005874133110046387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,64,0.01784106691678365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,10240,32,0.006159999966621399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,10240,32,0.018037333091100057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,768,0.01602666676044464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,65536,0.4780277252197266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,65536,0.7138538360595703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,2048,0.028538666168848675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,1024,0.01944533387819926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,16384,0.17534933090209961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,16384,0.13690026601155597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,12288,0.13201279640197755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,65536,0.5037749290466309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,12288,0.09952106475830078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,10240,0.11107947031656902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,10240,0.08836053212483724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,8192,0.08950826327006021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,8192,0.07227946917215983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,7168,0.07937599817911783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,7168,0.06458773215611777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,256,0.011271466811498005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,512,0.013753599921862283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,6144,0.06724053223927816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,6144,0.056951467196146646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,5120,0.05576533476511637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,5120,0.05004693269729614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,10240,128,0.010154666503270467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,4096,0.04568213224411011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,4096,0.04337600072224935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,16384,0.12258133093516033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,12288,0.09549012978871664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,3584,0.04079573154449463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,3584,0.03924053510030111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,10240,0.07961066563924155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,3072,0.035785599549611406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,3072,0.03578240076700846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,8192,0.0651968002319336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,2560,0.030487465858459472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,2560,0.032509867350260416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,7168,0.05790613492329916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,2048,0.025064533948898314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,2048,0.028897066911061604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,1536,0.0199455996354421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,6144,0.049942398071289064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,1536,0.026037333408991496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,1024,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,5120,0.04154239892959595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,1024,0.02286400000254313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,768,0.01241919994354248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,768,0.020518400271733604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,4096,0.03570559819539388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,512,0.010006399949391682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,3584,0.032817065715789795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,512,0.019378133614857993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,3072,0.028727465867996217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,2560,0.0253493328889211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,256,0.007212799787521362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,256,0.01772693395614624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,128,0.005857066810131073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,2048,0.022377600272496544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,128,0.017030400037765504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,64,0.005267199873924255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,64,0.01715946594874064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,8192,32,0.005435733497142792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,1536,0.019382399320602418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,8192,32,0.017122133572896322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,1024,0.015878400206565856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,16384,0.1557055950164795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,65536,0.440067195892334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,65536,0.6415914535522461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,768,0.013929599523544311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,16384,0.11500799655914307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,12288,0.12121280034383138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,12288,0.09000639915466309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,10240,0.09860266844431559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,512,0.012105600039164225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,10240,0.07783573468526205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,8192,0.08143040339152018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,256,0.010392533739407857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,8192,0.06594026486078898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,7168,0.06958613395690919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,7168,0.05931946833928427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,65536,0.49631039301554364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,6144,0.06740480264027914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,16384,0.1282314697901408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,12288,0.09599359830220541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,6144,0.055106135209401455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,5120,0.05128000179926554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,5120,0.04625920057296753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,4096,0.041405868530273435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,4096,0.03922239939371745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,8192,128,0.00986346701780955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,10240,0.07934186458587647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,3584,0.03688106536865234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,3584,0.03594559828440348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,3072,0.03204373319943746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,3072,0.03248853286107381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,8192,0.06645546754201254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,2560,0.026649600267410277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,2560,0.029666133721669513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,7168,0.056467199325561525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,2048,0.021993599335352578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,2048,0.02696746587753296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,2048,0.020809600750605263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,1536,0.01720213294029236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,1536,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,1024,0.012838400403658547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,1024,0.021015467246373494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,6144,0.0500928004582723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,768,0.010595200459162395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,5120,0.04317546685536702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,768,0.019115734100341796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,4096,0.03525439898173015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,512,0.008145066599051159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,512,0.017468800147374473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,3584,0.032128000259399415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,256,0.005983999868233999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,3072,0.027672533194224042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,256,0.017145599921544394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,128,0.0051594664653142296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,128,0.016798933347066246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,64,0.004816000163555145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,64,0.01699840029080709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,7168,32,0.005151999990145365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,1536,0.017343999942143758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,7168,32,0.017171200116475424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,1024,0.014784000317255654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,65536,0.5369120279947917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,65536,0.3942751884460449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,768,0.012130133310953776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,16384,0.13496960004170735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,16384,0.10538240273793538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,12288,0.1013258695602417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,12288,0.08306132952372233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,512,0.009861333171526591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,10240,0.08577173550923665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,2560,0.02488213380177816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,10240,0.07150293191274007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,256,0.008533333738644917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,8192,0.06891733010609945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,7168,128,0.007738666733105977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,8192,0.060310399532318114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,7168,0.060568531354268394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,7168,0.055054934819539394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,7168,0.05042879978815714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,6144,0.05223466555277506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,6144,0.04935146570205688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,5120,0.04401386578877767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,5120,0.04410346746444702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,5120,0.037699198722839354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,4096,0.03637866576512654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,4096,0.03705386718114217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,3584,0.03191999991734822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,3584,0.03454613288243612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,65536,0.4305866559346517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,16384,0.11014613310496013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,3072,0.02832213242848714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,12288,0.08095573584238688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,3072,0.030909866094589233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,2560,0.02333866755167643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,2560,0.02847359975179036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,10240,0.0689632018407186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,2048,0.019730132818222047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,8192,0.05762666861216227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,2048,0.026581333080927534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,1536,0.015820800264676412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,1536,0.02339093287785848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,1024,0.012155733505884806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,6144,0.04375253518422444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,1024,0.02151040037473043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,4096,0.03133866588274638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,768,0.010322133700052898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,768,0.01986879905064901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,3072,0.0250218669573466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,512,0.007898666461308797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,2560,0.022258132696151733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,512,0.01816213329633077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,256,0.005995733539263407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,2048,0.01911146640777588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,256,0.016768000523249307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,1536,0.0162090669075648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,128,0.005066666503747304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,128,0.016245333353678386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,64,0.00443200021982193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,1024,0.013567999998728434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,64,0.016446933150291443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,6144,32,0.004969599843025208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,6144,32,0.01662613352139791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,3584,0.029002666473388672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,768,0.01118933359781901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,65536,0.44623893102010087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,16384,0.11654293537139893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,65536,0.36247145334879555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,16384,0.09560426870981852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,12288,0.08762666384379068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,12288,0.07611947059631348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,10240,0.07380800247192383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,10240,0.06562240123748779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,512,0.009585066636403402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,8192,0.05891199906667074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,8192,0.05525226593017578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,7168,0.05101120074590047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,256,0.008474666873613994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,7168,0.050597333908081056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,6144,128,0.007874133189519246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,6144,0.04424639940261841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,6144,0.04600640137990315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,5120,0.03830826679865519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,5120,0.04000106652577718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,12288,0.07423253059387207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,4096,0.031030400594075518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,16384,0.10049920082092285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,4096,0.03591039975484212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,3584,0.027666133642196656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,3584,0.031996800502141313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,10240,0.06406506697336832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,6144,0.03915199836095174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,3072,0.024385066827138265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,3072,0.030048000812530517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,2560,0.020866133769353232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,7168,0.04580373366673787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,2560,0.02760853370030721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,2048,0.01762346625328064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,2048,0.02598506609598796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,8192,0.05231253306070963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,1536,0.01441493332386017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,65536,0.3969482739766439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,1536,0.023524266481399537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,1024,0.01135040024916331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,1024,0.02039360006650289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,5120,0.03411519924799601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,768,0.009784533580144247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,768,0.01924053430557251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,4096,0.028698666890462236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,3072,0.022268799940745036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,512,0.007618133227030437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,512,0.01842026710510254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,256,0.006152533491452535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,256,0.01718399922053019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,3584,0.026494934161504106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,1536,0.014691199858983359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,2560,0.020533333222071327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,128,0.004941866795221964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,128,0.016458666324615477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,64,0.004465066889921824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,5120,32,0.004678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,64,0.016380799810091655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,5120,32,0.016588800152142844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,2048,0.017569067080815633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,1024,0.012131200234095255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,65536,0.3524223963419596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,768,0.009904000163078307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,65536,0.3118037223815918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,16384,0.08941653569539389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,16384,0.08496320247650146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,12288,0.06744426886240641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,12288,0.06597119967142741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,10240,0.0565781315167745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,10240,0.05724373261133829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,8192,0.04602133433024089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,8192,0.04833386739095052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,512,0.008865066369374593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,256,0.0076223999261856076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,7168,0.04126826524734497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,7168,0.0437333345413208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,5120,128,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,6144,0.03538879950841268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,6144,0.03895999987920125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,5120,0.030009599526723225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,5120,0.03415253162384033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,10240,0.04467946688334147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,4096,0.02436586618423462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,65536,0.26453866958618166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,4096,0.03059733311335246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,12288,0.05272853374481201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,3584,0.021670399109522502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,3584,0.027998934189478557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,3072,0.019019732872645058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,16384,0.0697866678237915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,3072,0.026267733176549273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,2560,0.016540799538294473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,2560,0.024423466126124064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,7168,0.03318613370259603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,2048,0.01415786643822988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,2048,0.022715733448664347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,8192,0.036908801396687826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,1536,0.011509333054224651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,1536,0.02127359906832377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,6144,0.02937920093536377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,1024,0.009064533313115438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,1024,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,5120,0.025090134143829344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,768,0.0073642666141192125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,768,0.017746132612228394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,2560,0.015913599729537965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,4096,0.021471999088923135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,512,0.006078933179378509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,512,0.017340799172719322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,256,0.004966400067011515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,256,0.01625599960486094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,3584,0.020086399714152017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,128,0.004429866870244345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,128,0.016429866353670754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,2048,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,64,0.003903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,3072,0.017781333128611247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,64,0.015836800138155617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,4096,32,0.004199466605981191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,4096,32,0.015957333644231162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,65536,0.32615254720052084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,65536,0.28074986139933267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,1536,0.012429866194725036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,16384,0.08056853612263998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,16384,0.07658027013142904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,12288,0.060227199395497644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,12288,0.061596798896789554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,1024,0.010058666268984478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,10240,0.05069653193155924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,10240,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,256,0.007401599983374278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,128,0.007044266661008198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,8192,0.040106666088104245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,768,0.008982400099436443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,8192,0.04522346655527751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,7168,0.03646933237711589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,4096,512,0.008155733346939087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,7168,0.040668801466623945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,6144,0.031223465998967487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,6144,0.03705919981002807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,5120,0.026459733645121258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,5120,0.03264960050582886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,12288,0.053566932678222656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,4096,0.021606399615605672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,4096,0.028488532702128096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,16384,0.07114773591359456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,3584,0.019485867023468016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,65536,0.2744149208068848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,3584,0.027133866151173906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,3072,0.017143466075261436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,3072,0.025443200270334882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,6144,0.029365332921346028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,10240,0.045290664831797285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,2560,0.014870400230089823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,2560,0.023704532782236734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,2048,0.012820266683896384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,2048,0.02260479927062988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,7168,0.03337173461914063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,1536,0.010503466924031575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,1536,0.019742933909098308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,8192,0.03808426856994629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,1024,0.00811839997768402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,1024,0.01850773294766744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,5120,0.02576533357302348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,768,0.006877866884072621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,768,0.017798399925231932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,3072,0.017138133446375527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,512,0.005798399945100148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,512,0.016964266697565712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,4096,0.02119999925295512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,256,0.004622933268547058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,256,0.015898666779200234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,3584,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,128,0.004089600096146265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,128,0.015650133291880287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,1536,0.01160533328851064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,64,0.003868799904982249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,2560,0.015601066748301187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,64,0.01567039986451467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3584,32,0.003977599988381068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3584,32,0.015635200341542563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,65536,0.2804277420043945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,65536,0.2726784070332845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,2048,0.013531733552614847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,1024,0.009439999858538311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,16384,0.06981866359710694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,16384,0.07174826463063558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,12288,0.05155306657155355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,768,0.00823040008544922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,12288,0.05772053400675455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,10240,0.04326933224995931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,10240,0.05001493295033773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,512,0.007264000177383423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,256,0.006376533210277558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,8192,0.036136531829833986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,8192,0.04226346810658772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,7168,0.03168746630350749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,7168,0.03724266688028972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3584,128,0.0059445331494013464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,6144,0.02723306616147359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,6144,0.0342741330464681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,5120,0.023307732741038003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,5120,0.031014400720596313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,12288,0.04771093527475993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,4096,0.01907093326250712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,4096,0.02762453357378642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,65536,0.24307519594828286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,16384,0.06272960106531779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,3584,0.01732053359349569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,3584,0.025526400407155352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,3072,0.015093333522478738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,3072,0.02398080031077067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,10240,0.04049386580785115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,2560,0.013229866822560629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,6144,0.026103466749191284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,2560,0.022430932521820067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,2048,0.011387733618418376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,2048,0.020410666863123574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,8192,0.03417493502298991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,7168,0.02977173328399658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,1536,0.009629866480827332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,1536,0.019621332486470543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,1024,0.007187200089295705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,1024,0.01808639963467916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,5120,0.02295573353767395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,768,0.006187733511130015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,768,0.017413334051767985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,3072,0.015851733088493348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,4096,0.019556266069412232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,512,0.005322666466236114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,512,0.01690559983253479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,256,0.004456533491611481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,1536,0.010591999689737955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,256,0.015923200050989787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,128,0.003988266736268997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,128,0.015388799707094827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,3584,0.01816213329633077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,2560,0.014410666624704995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,64,0.003571200122435888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,64,0.01539413332939148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,3072,32,0.003803733239571253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,3072,32,0.015459199746449789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,65536,0.23047466278076173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,65536,0.24906880060831704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,2048,0.012283733487129212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,1024,0.00894719958305359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,16384,0.06445653438568115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,16384,0.06702400048573812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,12288,0.044498133659362796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,12288,0.05339306592941284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,10240,0.03700480063756307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,10240,0.045798401037851974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,768,0.007706666489442189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,8192,0.030856533845265703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,8192,0.03853226502736409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,256,0.006333866715431213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,7168,0.027438932657241823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,128,0.006072533130645752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,7168,0.03471893469492594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,3072,512,0.006996266543865204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,6144,0.02374826669692993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,6144,0.03189546664555867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,5120,0.020093866189320884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,5120,0.029546666145324706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,12288,0.04400746822357178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,4096,0.01637440025806427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,4096,0.026016000906626386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,65536,0.22573013305664064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,16384,0.05760000149408976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,3584,0.014961066842079162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,3584,0.024133332570393882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,3072,0.013293866316477457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,10240,0.0374570647875468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,3072,0.023066665728886923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,2560,0.01164906620979309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,2560,0.02179626623789469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,8192,0.03127040068308513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,2048,0.010103467106819152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,6144,0.02406826615333557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,2048,0.020309333006540933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,7168,0.027743999163309736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,1536,0.008627200126647949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,1536,0.018886399269104005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,5120,0.021248000860214233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,1024,0.006387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,1024,0.017749333381652833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,768,0.005751466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,768,0.01716266671816508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,3072,0.014740266402562461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,512,0.005009066561857859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,512,0.016539733608563742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,4096,0.01792959968249003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,256,0.0042357335488001505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,256,0.015707733233769734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,3584,0.016833066940307617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,2560,0.013313066959381104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,128,0.003735466549793879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,128,0.015024000406265258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,64,0.0035818666219711304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,64,0.015309866269429526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2560,32,0.0037280000746250153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2560,32,0.015352533260981242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,65536,0.18049813906351725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,1536,0.00981119970480601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,2048,0.011230933666229247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,65536,0.22388693491617837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,16384,0.05085866848627726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,16384,0.06781013011932373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,12288,0.03954026699066162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,12288,0.051831467946370446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,768,0.007339733342329661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,256,0.005868799984455109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,10240,0.03260800043741862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,512,0.006629333396752675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,10240,0.044343467553456625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,1024,0.008441600203514098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,8192,0.03145066698392232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,8192,0.038097067674001055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2560,128,0.005496533215045929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,7168,0.023909332354863484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,7168,0.033690667152404784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,6144,0.021356799205144248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,6144,0.030748800436655684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,5120,0.019179733594258626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,5120,0.02790293296178182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,16384,0.050537598133087155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,65536,0.23641066551208495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,4096,0.017076265811920167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,4096,0.02582506736119588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,3584,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,3584,0.024609067042668662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,12288,0.03765973250071208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,3072,0.012453333536783854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,3072,0.02257279952367147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,10240,0.03216639955838521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,2560,0.011122133334477742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,8192,0.027907200654347736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,2560,0.02061226765314738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,2048,0.009332266449928284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,7168,0.0240064005057017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,6144,0.021101866165796915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,2048,0.019462400674819948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,1536,0.007702399790287018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,1536,0.018212266763051353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,1024,0.0058037335673968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,1024,0.017259732882181803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,5120,0.018689066171646118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,768,0.005276800195376078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,768,0.016810667514801026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,4096,0.016035200158754984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,512,0.004746666550636292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,512,0.016437333822250367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,3072,0.013362133502960205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,256,0.003993600110212962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,256,0.015742933750152587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,3584,0.015547733505566916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,128,0.0035968000690142312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,128,0.015521066387494406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,2048,0.010368000467618306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,64,0.0033258666594823206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,64,0.015034666657447815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,768,0.00727893312772115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,2048,32,0.0034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,1024,0.007916800181070964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,2048,32,0.015035733580589294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,65536,0.14182933171590167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,2560,0.012418133020401002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,65536,0.19936960538228352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,16384,0.03939839998881022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,16384,0.06380906502405802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,12288,0.03118613362312317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,12288,0.0474453330039978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,1536,0.009524266918500264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,10240,0.029314132531483968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,10240,0.040088534355163574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,8192,0.026740266879399614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,512,0.006623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,8192,0.03547626733779907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,7168,0.020869332551956176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,7168,0.03178026676177979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,256,0.00606826643149058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,2048,128,0.005747200051943461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,6144,0.018833067019780478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,6144,0.029918932914733888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,5120,0.016291200121243795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,5120,0.026261333624521894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,12288,0.03748266696929932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,65536,0.20271892547607423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,4096,0.01456106702486674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,16384,0.04769386847813924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,4096,0.023924267292022704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,3584,0.012029866377512615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,3584,0.0226474662621816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,10240,0.03081600069999695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,3072,0.010718933741251628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,3072,0.022083199024200438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,2560,0.009902933239936828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,2560,0.019891200462977092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,8192,0.02611306707064311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,2048,0.00809386670589447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,2048,0.018323200941085815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,7168,0.023099732398986817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,1536,0.0065077334642410275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,1536,0.017941333850224814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,6144,0.020999467372894286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,1024,0.005447466671466827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,1024,0.017013333241144814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,5120,0.018199467658996583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,3072,0.011991467078526814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,768,0.0047541335225105286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,4096,0.015198933084805808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,768,0.016353066762288412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,512,0.004148266712824504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,3584,0.014550399780273438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,512,0.01569919983545939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,256,0.0036576000352700555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,256,0.015192533532778421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,2560,0.011030399799346923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,128,0.0033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,128,0.014692266782124838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,64,0.003173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,1536,0.008602666854858398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,2048,0.009678933024406432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,64,0.014907733599344889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1536,32,0.0032426667710145317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1536,32,0.014879999558130899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,65536,0.09660373528798422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,16384,0.028841600815455122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,65536,0.1738368034362793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,1024,0.007490133245786031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,16384,0.05438079833984375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,12288,0.023014400402704874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,12288,0.04018666744232178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,10240,0.02085226575533549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,10240,0.03545173406600952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,256,0.005339733262856802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,768,0.006470400094985962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,512,0.0058442667126655575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,8192,0.020706133047739664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1536,128,0.005145599941412607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,8192,0.03139839967091878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,7168,0.014844800035158793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,7168,0.028268800179163618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,6144,0.013379200299580892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,6144,0.02665173411369324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,5120,0.011991467078526814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,5120,0.023923200368881226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,65536,0.18643840154012042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,4096,0.011150933305422465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,4096,0.021792000532150267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,16384,0.041365333398183185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,3584,0.00918826659520467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,12288,0.03177066644032796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,3584,0.02085226575533549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,3072,0.008141866823037466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,3072,0.019734400510787963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,10240,0.027100799481074016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,2560,0.007423999905586243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,2560,0.018911999464035035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,8192,0.02257173260052999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,7168,0.02048426667849223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,2048,0.006515199939409892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,2048,0.01849386692047119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,6144,0.018080000082651773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,1536,0.005694933235645294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,1536,0.01717546582221985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,1024,0.004796800017356872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,5120,0.015607466300328573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,1024,0.01662506659825643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,768,0.00448639988899231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,768,0.01632213294506073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,4096,0.01332373321056366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,512,0.003946666667858759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,512,0.01574720044930776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,3072,0.010985599954922994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,256,0.0035189333061377203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,3584,0.013004799683888754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,256,0.01529813309510549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,128,0.0033429334561030067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,128,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,2560,0.010337066650390626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,2048,0.008749866485595703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,64,0.00306986669699351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,64,0.014808533589045205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,1024,0.006950399776299794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,1024,32,0.003222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,1024,32,0.014843733112017313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,1536,0.008354133367538452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,65536,0.07550079822540283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,65536,0.16314560572306316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,16384,0.023107200860977173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,16384,0.05064853429794312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,512,0.005922133227189382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,12288,0.01842986742655436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,12288,0.03922559817632039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,10240,0.01795413295427958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,256,0.005492266515890757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,10240,0.034499200185139973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,8192,0.01725013256072998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,8192,0.030268800258636475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,7168,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,7168,0.02792106668154399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,128,0.0051360001166661584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,6144,0.013107200463612875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,6144,0.02582719922065735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,1024,768,0.006296533346176148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,5120,0.011720533172289532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,5120,0.02358506719271342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,65536,0.1723669370015462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,4096,0.01011306643486023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,12288,0.032503465811411544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,16384,0.043220265706380205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,4096,0.022094933191935222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,3584,0.009325866897900898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,3584,0.0211136003335317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,3072,0.008098133405049642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,10240,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,3072,0.02059733271598816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,2560,0.0073525334397951765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,2560,0.01928960084915161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,8192,0.023970133066177367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,7168,0.021485867102940877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,2048,0.006340266764163971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,6144,0.0188426673412323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,2048,0.018372267484664917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,1536,0.005639466643333435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,1536,0.017654399077097573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,1024,0.004783999919891357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,1024,0.01681813398996989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,5120,0.016035200158754984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,768,0.004329599936803182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,768,0.01604159971078237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,4096,0.013522133231163025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,512,0.00384853333234787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,3072,0.010986666878064473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,512,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,3584,0.012916266918182373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,256,0.0034261333445707956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,256,0.015241600076357522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,128,0.0031061333914597826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,2560,0.010340266426404317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,1536,0.00810346653064092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,128,0.014590932925542196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,64,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,64,0.014958932995796204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,768,32,0.003018666555484136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,2048,0.008994133273760477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,1024,0.0070698668559392285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,768,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,65536,0.051957333087921144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,65536,0.15262187321980794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,16384,0.018631466229756675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,16384,0.045679998397827146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,12288,0.014710399508476257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,12288,0.03627520004908244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,10240,0.01360426644484202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,10240,0.03183253407478333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,768,0.006215466558933258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,8192,0.01225493351618449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,8192,0.027524266640345258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,256,0.005305600166320801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,512,0.005772800246874491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,768,128,0.0050357331832249965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,7168,0.011338667074839274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,7168,0.027449599901835126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,6144,0.010268800457318624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,6144,0.024664533138275147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,5120,0.009458133578300476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,5120,0.023494400580724082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,65536,0.17051413853963215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,4096,0.008809600273768108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,16384,0.03857280015945434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,4096,0.0218666672706604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,3584,0.008148266871770223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,12288,0.029410133759180706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,3584,0.021178666750590006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,3072,0.007250133156776428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,3072,0.01957119901974996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,10240,0.025501867135365803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,2560,0.00728000005086263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,8192,0.02111466725667318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,2560,0.018908800681432088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,7168,0.018615466356277467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,2048,0.0064640000462532045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,2048,0.018127999703089395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,6144,0.016473600268363954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,1536,0.005605333546797434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,1536,0.01755946675936381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,5120,0.014333867033322654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,1024,0.0047765334447224935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,1024,0.016771199305852254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,768,0.004206933577855428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,768,0.016266666849454246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,4096,0.012526933352152506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,512,0.0038005332152048744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,512,0.015658666690190635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,3072,0.010481066505114238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,256,0.003352533280849457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,3584,0.012114133437474568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,256,0.015517866611480713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,128,0.0031295999884605407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,128,0.01474240024884542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,2560,0.009783466657002766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,64,0.0029525332152843474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,64,0.014472533265749613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,512,32,0.0030080000559488933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,512,32,0.014518400033315023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,2048,0.008412800232569377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,65536,0.03517866532007853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,65536,0.14443626403808593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,16384,0.012226133545239767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,16384,0.03877973159154256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,1024,0.00681386689345042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,1536,0.00798826664686203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,16384,0.036830933888753255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,12288,0.00993066628774007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,12288,0.030487465858459472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,768,0.006038400034109751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,10240,0.009239466985066731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,10240,0.02839573423067729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,8192,0.00928000013033549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,8192,0.025854933261871337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,512,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,7168,0.008843732873598735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,256,0.005133866767088572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,7168,0.02504533330599467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,512,128,0.004891733328501383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,6144,0.008126933375994365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,6144,0.02360853354136149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,5120,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,5120,0.022959999243418374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,4096,0.007150933146476746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,4096,0.020811732610066733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,65536,0.1425909360249837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,3584,0.007039999961853028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,3584,0.02059626579284668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,12288,0.028197334210077925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,3072,0.006592000027497609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,10240,0.024308266242345174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,3072,0.019130667050679527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,2560,0.006424533327420552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,8192,0.020165334145228066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,2560,0.01830079952875773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,2048,0.005748266478379568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,7168,0.018172800540924072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,2048,0.017785600821177163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,6144,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,1536,0.0050335998336474095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,1536,0.01735573410987854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,1024,0.004294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,5120,0.014055466651916504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,1024,0.016639999548594155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,768,0.003945599993069967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,768,0.016106667121251424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,4096,0.012300800283749897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,512,0.0035616000493367515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,512,0.015225600202878317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,3584,0.011875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,256,0.003230933348337809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,256,0.014859732985496522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,3072,0.010170666376749675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,128,0.002930133293072383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,128,0.014582399527231851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,2560,0.009583999713261921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,64,0.0028021333118279776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,2048,0.008273066580295562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,64,0.014822399616241455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,256,32,0.0027562665442625684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,1536,0.007867733140786488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,256,32,0.014422399799029031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,65536,0.026563199361165364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,1024,0.0066538666685422255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,65536,0.13897493680318196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,16384,0.009586133559544881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,16384,0.03588266770044963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,768,0.005747200051943461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,12288,0.008374399940172831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,12288,0.03030293385187785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,10240,0.0077674667040507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,10240,0.02772480050722758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,512,0.005419733126958212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,8192,0.008076799909273784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,8192,0.02526293396949768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,256,0.004875733455022176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,7168,0.0077674667040507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,7168,0.024714666604995727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,256,128,0.0047775998711586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,6144,0.007181866466999054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,6144,0.02443839907646179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,65536,0.13709972699483236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,16384,0.03626453479131063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,5120,0.006783999999364217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,12288,0.028061866760253906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,5120,0.022921599944432578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,4096,0.006380799909432728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,4096,0.02103253404299418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,10240,0.024209066232045492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,3584,0.006741333504517872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,3584,0.01948480010032654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,3072,0.006431999802589417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,3072,0.019429334004720054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,8192,0.02020906607309977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,2560,0.006222933530807495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,2560,0.018133334318796792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,2048,0.005646933118502299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,7168,0.018150399128595986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,2048,0.01735573410987854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,1536,0.004936533172925314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,1536,0.017112533251444496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,6144,0.016124799847602844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,1024,0.00417493333419164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,1024,0.016378666957219443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,4096,0.012119467059771221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,768,0.00401706670721372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,3584,0.011858133474985759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,768,0.015927466750144958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,5120,0.014152533809343972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,512,0.003537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,512,0.015453867117563882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,256,0.003180799881617228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,3072,0.010081066687901815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,256,0.015086932977040609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,128,0.0030559999247392017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,2560,0.009398399790128072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,128,0.014632532993952433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,2048,0.008189866443475087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,64,0.0028437333802382152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,64,0.014490666985511779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,128,32,0.00297173336148262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,128,32,0.014537599682807923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,1536,0.00782719999551773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,65536,0.024040534098943075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,16384,0.008354133367538452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,65536,0.13722453117370606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,16384,0.03574613332748413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,12288,0.0073173334201176955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,12288,0.029734400908152263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,1024,0.006691200037797292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,10240,0.0069461335738499955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,10240,0.02843093276023865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,8192,0.00655680000782013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,8192,0.025821866591771443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,7168,0.0064064001043637585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,768,0.005950933198134104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,7168,0.024616533517837526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,6144,0.006159999966621399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,512,0.005496533215045929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,6144,0.023962666591008507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,5120,0.006617600222428639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,4096,0.006072533130645752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,5120,0.022670932610829673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,4096,0.02068480054537455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,3072,0.018681599696477254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,3584,0.006684799989064534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,256,0.0050122668345769245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,3584,0.01997440059979757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,3072,0.00629013329744339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,2560,0.006186666587988535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,512,128,128,0.004756266872088114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,2560,0.018454400698343913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,2048,0.005499733487764994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,2048,0.018253866831461588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,1536,0.004694400231043497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,1536,0.016966400543848674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,1024,0.004036266605059306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,1024,0.01614293356736501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,768,0.0036757332583268487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,768,0.01565120021502177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,512,0.0034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,512,0.015170133113861084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,256,0.0029866665601730345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,256,0.01479680041472117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,128,0.0028575999041398365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,128,0.014633599917093912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,16384,0.007276799778143566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,64,0.0026591998835404714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,64,0.014376533031463624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,64,32,0.002701866626739502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,64,32,0.014723199605941772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,65536,0.02212160031000773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,65536,0.13650879859924317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,16384,0.03476373354593913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,12288,0.006487466891606649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,12288,0.029425066709518433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,10240,0.006472533444563548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,10240,0.027748266855875652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,8192,0.006422399977842967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,8192,0.025306665897369386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,7168,0.006555733581384023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,6144,0.006123733520507812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,7168,0.024727465709050496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,6144,0.024112000068028768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,5120,0.006541866560777028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,5120,0.022553600867589316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,4096,0.006100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,3072,0.018786134322484334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,4096,0.021338667472203574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,3584,0.006444799900054932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,3584,0.019986132780710854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,2048,0.017326933145523072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,3072,0.0061482667922973635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,2560,0.006084266801675161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,2048,0.005470933516820272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,2560,0.018821332852045694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,1536,0.00476800004641215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,1024,0.0040618665516376495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,1536,0.016919465859731038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,1024,0.016269866625467935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,768,0.0039061332742373147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,768,0.01583466629187266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,512,0.003323733309904734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,512,0.015107199549674988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,256,0.003102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,256,0.014938666423161825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,128,0.0027797333896160126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,128,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,64,0.002644266684850057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,64,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,512,32,32,0.002632533262173335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,512,32,32,0.014442666371663412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,16384,0.5952309290568034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,12288,0.44715092976888016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,12288,0.5388522466023763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,10240,0.3744757334391276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,16384,1.1907093048095703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,12288,0.8424352010091146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,10240,0.6899669647216797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,8192,0.32060159047444664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,8192,0.5492160161336263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,7168,0.4713600158691406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,7168,0.2745727856953939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,7168,0.3241888046264648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,6144,0.41717227300008136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,6144,0.2417311986287435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,5120,0.3504159927368164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,5120,0.20255467096964516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,5120,0.23361706733703613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,4096,0.27589438756306967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,4096,0.17045547167460123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,3584,0.24836373329162598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,3584,0.1501471996307373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,3584,0.17072854042053223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,3072,0.21454933484395347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,3072,0.13178986708323162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,16384,0.7352234522501628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,2560,0.11497706572214764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,2560,0.18078932762145997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,2048,0.14453760782877606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,2048,0.09768853187561036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,8192,0.35831146240234374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,1536,0.1137994686762492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,1536,0.08160746892293294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,6144,0.2729311943054199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,1024,0.07895680268605551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,1024,0.06165866851806641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,768,0.0619050661722819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,4096,0.19172266324361165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,768,0.053758935133616126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,512,0.04625706672668457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,10240,0.44567359288533526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,3072,0.14820159276326497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,512,0.047777068614959714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,256,0.03262719909350077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,256,0.03903253475824992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,256,0.04329493443171183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,2560,0.1269610643386841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,128,0.025214932362238568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,128,0.0350325345993042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,128,0.04012800057729085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,64,0.02213866710662842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,64,0.034780800342559814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,2048,0.10636906623840332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,65536,32,0.02179946700731913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,65536,32,0.035345065593719485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,1536,0.08756373723347982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,16384,0.26991360982259116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,16384,0.1705738703409831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,1024,0.0679690678914388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,65536,0.6632191975911458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,65536,1.1433024088541666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,12288,0.20854506492614747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,12288,0.13082666397094728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,768,0.061051734288533534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,10240,0.17435946464538574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,10240,0.11172586282094318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,10240,0.11710933049519856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,8192,0.1403146743774414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,65536,512,0.05070720116297404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,8192,0.0927903970082601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,7168,0.12337493101755778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,7168,0.08300267060597738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,6144,0.10632533232371014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,6144,0.07326080004374186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,5120,0.08819413185119629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,5120,0.06353919903437297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,16384,0.17853439648946126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,65536,0.7659456253051757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,4096,0.07038933436075846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,12288,0.1394378662109375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,4096,0.06327999830245971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,3584,0.0627349336942037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,3584,0.06158933242162069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,3072,0.05526400009791056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,8192,0.0959882656733195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,3072,0.04766720136006673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,7168,0.08304213682810466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,2560,0.04667093356450398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,2560,0.041169067223866776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,2048,0.03826560179392497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,2048,0.035588268438975015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,1536,0.0296725332736969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,1536,0.031402667363484696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,5120,0.06104000012079874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,6144,0.07394560178120932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,1536,0.026174932718276978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,1024,0.021101866165796915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,1024,0.0264789342880249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,768,0.017211733261744182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,768,0.024498132864634196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,512,0.013318399588267008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,4096,0.04954559803009033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,512,0.02249600092569987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,256,0.00957973301410675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,3584,0.04497919877370198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,256,0.01955946683883667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,128,0.007202133536338806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,128,0.018326399723688762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,3072,0.04015466769536336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,128,0.01259519954522451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,64,0.006190933287143707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,2560,0.034381866455078125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,64,0.01895786722501119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,16384,32,0.0064522668719291685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,16384,32,0.01909760038057963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,2048,0.031155200799306233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,16384,0.19989760716756183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,65536,0.4980319976806641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,1024,0.020994132757186888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,65536,0.8175274531046549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,16384,0.1317792018254598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,768,0.018874667088190713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,12288,0.15122453371683758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,12288,0.10958080291748047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,10240,0.1248960018157959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,512,0.01602133313814799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,10240,0.08848533630371094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,8192,0.10088533560434979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,8192,0.0732479969660441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,16384,256,0.013923199971516928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,7168,0.08878506819407145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,7168,0.06597653230031332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,6144,0.07665173212687174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,6144,0.05891199906667074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,5120,0.06347306569417319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,5120,0.05151893297831217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,12288,0.11967573165893555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,4096,0.052692266305287686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,4096,0.04478933413823445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,65536,0.6114218393961589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,16384,0.15796906153361004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,6144,0.06155519882837931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,3584,0.047635201613108316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,3584,0.04144959847132365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,10240,0.10130133628845214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,3072,0.04026240110397339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,3072,0.037649067242940266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,2560,0.03417386611302693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,7168,0.0707594633102417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,2560,0.03397866487503052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,2048,0.028152533372243244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,2048,0.030562132596969604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,1536,0.022503467400868733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,8192,0.08359999656677246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,1536,0.02719786763191223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,1024,0.016768000523249307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,1024,0.023950932423273723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,5120,0.05213760137557984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,768,0.01376426617304484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,768,0.022344533602396646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,3072,0.033885868390401204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,4096,0.04323519865671794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,512,0.01099626620610555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,512,0.02034133275349935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,1536,0.02093120018641154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,256,0.007825066645940144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,256,0.0182805339495341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,2560,0.030129067103068036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,128,0.006296533346176148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,3584,0.039074134826660153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,128,0.01760960022608439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,64,0.005470933516820272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,64,0.017476266622543334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,12288,32,0.006041599810123444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,2048,0.0265610675017039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,12288,32,0.017913599809010826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,768,0.014995200435320535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,65536,0.4429087956746419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,65536,0.675707753499349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,16384,0.173961607615153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,65536,0.5147466659545898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,16384,0.11944426695505779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,12288,0.13202880223592123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,12288,0.09432319800059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,10240,0.11037973562876384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,10240,0.08024106820424398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,8192,0.08758719762166342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,8192,0.06746880213419595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,128,0.009637332955996196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,256,0.010692266623179118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,7168,0.07815360228220622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,512,0.012823466459910074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,7168,0.06068160136540731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,12288,1024,0.017512534062067667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,16384,0.1339296023050944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,6144,0.06754986445109049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,6144,0.05423680146535238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,12288,0.09931200345357259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,5120,0.05626773436864217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,10240,0.08579413096110025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,4096,0.04594026803970337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,5120,0.04841173489888509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,4096,0.04269013404846191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,3584,0.0404309352238973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,3584,0.03851519823074341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,8192,0.06878933111826578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,3072,0.03528000116348266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,3072,0.035436801115671795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,2560,0.030103466908137005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,2560,0.03208319942156474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,2560,0.026320000489552815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,2048,0.025166932741800947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,2048,0.02872213323911031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,1536,0.01986560026804606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,1536,0.025860265890757246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,7168,0.06084800163904826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,1024,0.014929067095120749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,6144,0.05305279890696207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,1024,0.02318506638209025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,768,0.012597333391507468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,5120,0.04617066780726115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,768,0.021530665953954062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,512,0.010315733154614766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,4096,0.03718080123265584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,512,0.01867413322130839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,3584,0.03364693323771159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,256,0.007259733478228251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,3072,0.028875732421875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,256,0.017644800742467246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,128,0.00591786652803421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,128,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,64,0.005677866439024607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,2048,0.02304319938023885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,64,0.01712533235549927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,10240,32,0.005765333275000254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,10240,32,0.017044266064961754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,1536,0.018388267358144125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,65536,0.5452885309855143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,65536,0.36717440287272135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,1024,0.015590399503707886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,16384,0.1402773380279541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,16384,0.1002773364384969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,768,0.01344533363978068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,12288,0.10501866340637207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,12288,0.07942826747894287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,10240,0.08817066351572672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,512,0.011166933178901672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,10240,0.0678933302561442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,8192,0.0707914670308431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,256,0.00970240036646525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,8192,0.05752533276875814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,10240,128,0.00867306689421336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,7168,0.061742933591206875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,7168,0.05198186635971069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,6144,0.05324906508127848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,6144,0.04683733383814494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,5120,0.04465706745783488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,5120,0.04152746597925822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,12288,0.0827946662902832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,4096,0.0368938684463501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,4096,0.035702399412790936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,65536,0.41677440007527666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,16384,0.1086197296778361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,3584,0.03258986671765645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,6144,0.04276373386383057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,3584,0.03294720053672791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,3072,0.02840426762898763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,10240,0.06946559747060141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,3072,0.030437332391738892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,2560,0.024290132522583007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,7168,0.048506665229797366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,2560,0.028510934114456175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,2048,0.020088533560434975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,8192,0.05717653433481852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,2048,0.025836799542109174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,1536,0.015870933731396995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,1536,0.023679999510447185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,1024,0.012124799688657125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,1024,0.0208512008190155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,5120,0.03722879886627197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,768,0.010477866729100544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,768,0.01958720088005066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,3072,0.024626133839289348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,512,0.008201600114504496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,4096,0.030487465858459472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,512,0.01831573247909546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,256,0.006066133578618368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,256,0.017335466543833413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,1536,0.01602666676044464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,128,0.0052042668064435325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,2560,0.022165334224700926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,128,0.016268799702326454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,64,0.004642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,64,0.01646080017089844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,8192,32,0.005108266572157542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,2048,0.019819732507069907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,8192,32,0.01664746701717377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,768,0.011542399724324543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,3584,0.027944533030192058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,65536,0.4734389305114746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,16384,0.119540270169576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,65536,0.33398825327555337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,1024,0.013666133085886637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,16384,0.09923946857452393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,12288,0.09184853235880533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,12288,0.07248319784800211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,10240,0.07711253166198731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,10240,0.06320319970448812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,8192,0.0624725341796875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,8192,0.05358293453852335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,256,0.008669867118199667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,128,0.008205866813659668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,7168,0.05452906688054403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,8192,512,0.00978773335615794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,7168,0.04889280001322428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,6144,0.04660160144170125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,6144,0.043866666158040364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,5120,0.03901760180791219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,5120,0.03880746761957805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,12288,0.07888853549957275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,4096,0.03269866704940796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,4096,0.03408426841100057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,65536,0.3997152010599772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,3584,0.03433493375778198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,16384,0.1035914659500122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,3584,0.033956265449523924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,3072,0.025727999210357667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,10240,0.06658879915873209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,3072,0.03018239935239156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,2560,0.022006400426228843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,2560,0.027731200059254963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,8192,0.055581867694854736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,6144,0.04078720013300578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,2048,0.018382932742436728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,2048,0.02527359922726949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,1536,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,1536,0.023110399643580117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,7168,0.047414398193359374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,1024,0.011720533172289532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,1024,0.020677334070205687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,5120,0.03614720106124878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,768,0.0100874662399292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,768,0.01883626580238342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,3072,0.02371946573257446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,4096,0.02974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,512,0.007987200220425924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,512,0.018683733542760213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,256,0.006260266900062561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,256,0.017846399545669557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,2560,0.021626667181650797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,3584,0.027523199717203777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,128,0.005510400235652924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,1536,0.015729066729545594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,128,0.016760534048080443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,64,0.004916266600290934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,64,0.016860800981521606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,7168,32,0.005243733525276184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,7168,32,0.01716586748758952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,65536,0.4020832061767578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,65536,0.3098047892252604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,2048,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,1024,0.013136000434557597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,16384,0.10344959894816082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,16384,0.08331519762674967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,12288,0.07905920346577963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,768,0.011084799965222675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,12288,0.0667306661605835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,10240,0.06622080008188883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,10240,0.057650132973988855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,256,0.008274133503437042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,8192,0.05360320011774698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,512,0.009644800424575805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,8192,0.04899199803670247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,7168,0.04710826476414998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,7168,128,0.0077802668015162155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,7168,0.04448106686274211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,6144,0.040218667189280195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,6144,0.03981013298034668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,5120,0.034518400828043624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,5120,0.035282135009765625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,12288,0.06872639656066895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,65536,0.38482348124186194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,4096,0.02765973409016927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,16384,0.09344639778137206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,4096,0.03237226605415344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,3584,0.02485333283742269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,3584,0.029308799902598066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,3072,0.021663999557495116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,3072,0.027406932910283406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,10240,0.05955093304316202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,2560,0.01854506731033325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,2560,0.025498666365941364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,6144,0.036817065874735516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,2048,0.015738667050997416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,7168,0.04295573234558105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,2048,0.023784534136454264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,1536,0.012969600160916648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,1536,0.021667200326919555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,8192,0.049370666344960526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,1024,0.009981866677602131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,1024,0.018897066513697304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,5120,0.03277013301849365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,768,0.008646399776140849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,768,0.017915733655293784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,3072,0.021735467513402305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,4096,0.02706986665725708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,512,0.006902400155862172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,512,0.017340799172719322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,256,0.005407999952634176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,256,0.01695573329925537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,3584,0.024642133712768556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,128,0.004579199850559235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,2560,0.019637332359949747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,128,0.015846400459607442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,64,0.004068266600370407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,64,0.016056533654530844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,1536,0.014490666985511779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,6144,32,0.004384000102678935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,6144,32,0.016243199507395424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,65536,0.35332374572753905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,65536,0.2816213289896647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,2048,0.017254400253295898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,16384,0.08871466318766276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,16384,0.07611733277638753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,1024,0.012139733632405598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,12288,0.06871786912282309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,768,0.010163199901580811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,12288,0.06047466595967611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,12288,0.05481173197428385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,10240,0.057460268338521324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,10240,0.053142400582631436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,8192,0.04649279912312825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,8192,0.045558400948842365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,8192,0.038991999626159665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,7168,0.04119360049565633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,7168,0.04124373197555542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,6144,0.035740800698598224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,6144,0.037539199988047285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,256,0.007948799928029378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,512,0.008888533711433411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,5120,0.029487999280293782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,5120,0.03276159962018331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,5120,0.026492800315221148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,6144,128,0.0073173334201176955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,4096,0.02441493272781372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,65536,0.2789205233256022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,4096,0.02969706654548645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,16384,0.07189119656880696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,3584,0.02164906660715739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,3584,0.027695999542872114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,3072,0.019091200828552247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,3072,0.027027199665705364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,10240,0.046418134371439615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,2560,0.016541866461435954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,2560,0.02401813268661499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,2048,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,7168,0.03408533334732056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,2048,0.022775467236836752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,1536,0.01167039970556895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,1536,0.02082560062408447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,1024,0.009264000256856282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,6144,0.02967039942741394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,1024,0.018104533354441323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,1024,0.009841066598892213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,768,0.007843199868996937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,768,0.01770346760749817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,4096,0.02189013361930847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,512,0.006099199752012888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,3584,0.020089600483576456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,512,0.017195733388264973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,256,0.0052373334765434265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,3072,0.01798400084177653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,256,0.016214399536450704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,2560,0.01616320013999939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,128,0.004356266558170318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,128,0.015622400244077048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,2048,0.014797866344451904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,64,0.0041802664597829185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,1536,0.012158933281898498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,5120,32,0.004297600189844767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,64,0.015890133380889893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,5120,32,0.01583573321501414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,65536,0.2760042508443197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,65536,0.24090879758199057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,65536,0.2317567984263102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,16384,0.07787520090738932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,16384,0.07892586390177408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,12288,0.055889066060384116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,768,0.00865600009759267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,12288,0.05700159867604574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,10240,0.04763946533203125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,10240,0.051183998584747314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,8192,0.04568426609039307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,512,0.007739733159542084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,8192,0.043689600626627606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,8192,0.03220906654993693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,7168,0.03458026647567749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,7168,0.03886293172836304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,6144,0.030296534299850464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,256,0.006903466582298279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,6144,0.03582293192545573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,6144,0.025145600239435833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,5120,0.02592960000038147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,5120,0.029402667284011842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,5120,128,0.006409599880377452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,4096,0.023115734259287514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,4096,0.026495999097824095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,3584,0.019189333915710448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,3584,0.024910932779312132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,3072,0.01690346598625183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,3072,0.023933867613474526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,16384,0.059906133015950526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,12288,0.04494400024414062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,2560,0.013491200407346091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,2560,0.022987733284632363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,10240,0.0384117325146993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,2048,0.011506133278210958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,2048,0.02104746699333191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,1536,0.009768533706665038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,1536,0.019433599710464478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,7168,0.0281333327293396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,1024,0.007825066645940144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,5120,0.02216213345527649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,1024,0.018081067005793254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,768,0.006322133541107178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,4096,0.018318933248519898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,768,0.017468800147374473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,3584,0.017091200749079386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,512,0.005295999844868978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,512,0.016428800423940022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,3072,0.01534293293952942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,256,0.004403199752171834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,256,0.015684266885121666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,256,0.006477866570154827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,128,0.003958400090535482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,128,0.015267200271288552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,128,0.006050133208433787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,64,0.003517866631348928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,2560,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,64,0.01537493367989858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,4096,32,0.003836799909671148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,2048,0.012880000472068786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,4096,32,0.015539200107256571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,65536,0.24793492952982582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,1536,0.011005866527557372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,16384,0.06859947045644124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,65536,0.2264618714650472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,16384,0.06540373166402182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,12288,0.0535701314608256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,1024,0.008503466844558716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,12288,0.052444799741109216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,10240,0.04337066809336344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,10240,0.046012798945109054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,768,0.007754666606585185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,8192,0.035145600636800126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,10240,0.037172265847524005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,8192,0.039450665314992264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,7168,0.03100693424542745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,4096,512,0.0069909334182739254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,7168,0.03616213401158651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,6144,0.027221333980560303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,6144,0.032357333103815715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,5120,0.022651733954747517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,5120,0.030822400252024335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,65536,0.22078612645467124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,16384,0.05807786782582601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,4096,0.018717867136001588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,4096,0.027219200134277345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,12288,0.04413226842880249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,3584,0.01695573329925537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,3584,0.026044799884160356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,3072,0.015084800124168397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,8192,0.0318997323513031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,3072,0.02490239938100179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,7168,0.02762666742006938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,2560,0.01464959979057312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,2560,0.022733867168426514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,6144,0.024412800868352257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,2048,0.01262079974015554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,2048,0.021112533410390218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,1536,0.010126933455467224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,1536,0.019029333194096883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,5120,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,1024,0.008137600123882293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,1024,0.01841813325881958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,768,0.006467199822266896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,768,0.017386666933695474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,4096,0.018367999792099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,512,0.005635199944178263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,3584,0.01708266735076904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,512,0.016574933131535848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,256,0.004553600152333578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,3072,0.015196800231933594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,256,0.015867732961972556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,2560,0.013772799571355184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,128,0.004092800120512644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,128,0.015763200322786965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,2048,0.01275200049082438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,64,0.0035797332723935447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,64,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3584,32,0.0037439999481042228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3584,32,0.01548479994138082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,65536,0.20926292737325033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,1536,0.010104533036549885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,65536,0.20724587440490722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,1024,0.00843946635723114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,16384,0.0589194655418396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,16384,0.05998080174128214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,12288,0.04637120167414348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,768,0.007766399780909221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,12288,0.048654933770497635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,10240,0.03664960066477458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,10240,0.04266026814778646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,8192,0.029713066418965657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,512,0.007009066641330719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,8192,0.035894401868184406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,7168,0.026348799467086792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,256,0.006347733239332835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,7168,0.032681600252787275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,6144,0.02313279906908671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3584,128,0.005834666887919108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,6144,0.030113067229588824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,5120,0.019707733392715455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,5120,0.02930240035057068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,65536,0.20523519515991212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,4096,0.01660053332646688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,16384,0.0527456005414327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,4096,0.02612053354581197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,12288,0.03994133472442627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,3584,0.01490133305390676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,10240,0.03379093408584595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,3584,0.024753065903981526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,3072,0.013209600249926248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,8192,0.028776532411575316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,3072,0.023364265759785972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,2560,0.013637333114941915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,2560,0.02221440076828003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,2560,0.012871467073758445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,7168,0.02514773408571879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,2048,0.011424000064531963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,2048,0.02077440023422241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,1536,0.009290666381518046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,6144,0.022729599475860597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,1536,0.018886399269104005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,1024,0.0076906666159629825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,1024,0.017538134256998697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,5120,0.020006400346755982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,768,0.006217599908510844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,768,0.017093332608540852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,512,0.005095466474692027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,512,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,4096,0.016939733425776163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,256,0.004071466624736786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,3584,0.015702399611473083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,256,0.015889066457748412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,3072,0.014180266857147216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,128,0.003623466690381368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,128,0.015118933717409768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,64,0.003419733295838038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,64,0.015355733036994935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,2048,0.011678933103879293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,3072,32,0.0035573333501815797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,1536,0.009604266285896302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,3072,32,0.015441067020098367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,1024,0.008051200211048127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,65536,0.18062079747517903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,65536,0.19247040748596192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,768,0.007417599856853485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,16384,0.05088853438695272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,16384,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,512,0.006762666503588359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,12288,0.03802560170491536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,12288,0.04614400068918864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,10240,0.03241706689198812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,10240,0.041010133425394696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,8192,0.0262442668279012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,256,0.006086400151252747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,8192,0.033979733784993485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,7168,0.023436800638834635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,3072,128,0.0056874667604764305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,7168,0.03075733383496602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,6144,0.020544000466664634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,6144,0.028305067618687944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,5120,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,5120,0.025656533241271973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,65536,0.20143465995788573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,16384,0.04506133397420247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,4096,0.014362667004267374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,12288,0.03448959986368815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,4096,0.0233952005704244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,3584,0.01285653313000997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,3584,0.02249706586201986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,10240,0.029618134101231892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,3072,0.011403733491897583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,8192,0.024800000588099162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,3072,0.021861332654953002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,2560,0.010292266805966694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,2560,0.0215445339679718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,2048,0.008916266759236654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,7168,0.022105600436528525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,2048,0.019054933389027914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,1536,0.007786666850248973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,1536,0.01841920018196106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,6144,0.020071466763814293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,1024,0.005975466469923655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,1024,0.017199999094009398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,5120,0.017717333634694417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,768,0.005458133419354757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,768,0.016812799374262492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,4096,0.015277866522471109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,512,0.004706133405367533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,512,0.016033066312472026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,3072,0.013019733627637229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,256,0.004035199930270513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,256,0.015577600399653117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,2048,0.010294399658838908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,3584,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,128,0.003681066632270813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,128,0.01514346698919932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,64,0.003370666752258936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,2560,0.011901866396268208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,64,0.015075199802716575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2560,32,0.0034944000343481696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2560,32,0.014855466286341348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,1024,0.007482666770617168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,65536,0.14254825909932453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,65536,0.17017919222513836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,16384,0.04360746542612712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,768,0.007179733117421467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,16384,0.055538133780161536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,12288,0.03287360072135925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,12288,0.046242133776346846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,1536,0.009040000041325887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,10240,0.027820799748102826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,10240,0.040807465712229415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,8192,0.03024959961573283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,8192,0.03460799853006999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,256,0.006133333345254262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,512,0.0067669332027435304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,7168,0.025634133815765382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,7168,0.03126399914423625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2560,128,0.005764266848564148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,6144,0.020858667294184365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,6144,0.02917120059331258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,5120,0.01853546698888143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,5120,0.025059199333190917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,12288,0.03379733165105184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,16384,0.044556800524393717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,4096,0.015541332960128783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,65536,0.1896010716756185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,4096,0.021993599335352578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,3584,0.01418560047944387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,3584,0.021016534169514975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,3072,0.01209279994169871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,10240,0.0290613333384196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,3072,0.019377066691716512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,2560,0.00876586635907491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,2560,0.019884800910949706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,8192,0.024503467480341594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,2048,0.007626666625340779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,2048,0.018653867642084758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,6144,0.019444266955057778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,7168,0.021574399868647256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,1536,0.006539733211199443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,1536,0.018053332964579262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,5120,0.017343999942143758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,1024,0.005354666709899902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,1024,0.016961065928141277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,768,0.004753066599369049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,3072,0.0121781329313914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,768,0.016555733482042947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,4096,0.01480959951877594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,512,0.0042026668787002565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,512,0.01576533317565918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,256,0.003736533224582672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,3584,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,256,0.015405866503715514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,2560,0.010636799534161885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,128,0.0033973333736260734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,128,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,64,0.0032511999209721885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,64,0.014942933122316995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,2048,32,0.00345920001467069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,2048,32,0.01507306694984436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,65536,0.11017706394195556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,65536,0.15341332753499348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,1536,0.008237866560618083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,16384,0.03323946595191955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,2048,0.009816533327102661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,16384,0.05014079809188843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,12288,0.027192533016204834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,12288,0.03821226755777995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,1024,0.007106133302052816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,768,0.006632533172766368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,10240,0.023538132508595787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,256,0.005435733497142792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,512,0.005989333490530649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,10240,0.03431040048599243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,8192,0.019834667444229126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,2048,128,0.0051807999610900875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,8192,0.030140799283981324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,7168,0.01763733426729838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,7168,0.028099199136098225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,6144,0.01567893326282501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,6144,0.025779199600219727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,5120,0.013748266299565635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,5120,0.02534826596577962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,65536,0.16377065976460775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,4096,0.011678933103879293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,12288,0.03243199984232585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,4096,0.02281173268953959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,16384,0.043084800243377686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,3584,0.010632533828417461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,3584,0.02141439914703369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,3072,0.009692800045013428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,3072,0.020097066958745323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,10240,0.027610667546590167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,2560,0.009103999535242716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,2560,0.019526400168736777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,6144,0.018759467204411826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,8192,0.02375040054321289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,7168,0.021041067441304524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,2048,0.007854933540026348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,2048,0.018782933553059898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,1536,0.006444799900054932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,1536,0.01768640081087748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,1024,0.005233066777388254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,1024,0.016993065675099693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,5120,0.016937599579493205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,768,0.004713599880536398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,768,0.016315733393033348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,4096,0.01404906709988912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,3072,0.011292800307273865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,512,0.00415040006240209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,3584,0.012825600306193032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,512,0.016075733304023742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,256,0.003610666592915853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,256,0.015243732929229736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,128,0.003310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,128,0.014909866452217101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,1536,0.008164266745249431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,64,0.0031338666876157125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,2560,0.010174933075904845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,64,0.014958932995796204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1536,32,0.0032106667757034303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,2048,0.009507200121879578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1536,32,0.015191466609636942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,1024,0.007052800059318543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,65536,0.07688746452331544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,65536,0.13695786794026693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,16384,0.026397866010665894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,16384,0.04654293457667033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,12288,0.024217599630355836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,768,0.006503466765085857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,12288,0.035411198933919266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,10240,0.0212991992632548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,10240,0.03180906573931376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,8192,0.016618667046229045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,8192,0.0283786674340566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,256,0.005452799797058106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,512,0.0059114664793014525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,7168,0.0149536003669103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,7168,0.02687573234240214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1536,128,0.005161599814891815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,6144,0.013379200299580892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,6144,0.024567466974258424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,5120,0.011803733309110005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,12288,0.030374399820963544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,5120,0.022966400782267252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,65536,0.1591946601867676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,4096,0.010150399804115296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,4096,0.021015467246373494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,16384,0.039332266648610434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,3584,0.00950933297475179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,3584,0.02095466653505961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,10240,0.025898667176564534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,3072,0.008514133095741273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,3072,0.019257599115371705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,2560,0.007538133362929027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,2560,0.018513067563374837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,8192,0.022000000874201456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,2048,0.006410666803518932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,2048,0.018014933665593466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,7168,0.019720532496770225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,6144,0.017629865805308023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,1536,0.0056970665852228795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,1536,0.01700906753540039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,1024,0.0046528001626332605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,1024,0.016667733589808144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,4096,0.012914133071899415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,5120,0.015425067146619162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,3072,0.010460799932479859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,768,0.004193066557248434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,768,0.016156799594561257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,3584,0.011892267068227132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,512,0.003789866715669632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,512,0.015603199601173401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,256,0.0033471999069054925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,256,0.01516480048497518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,2560,0.00977066655953725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,128,0.0031178665657838186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,1536,0.007643733421961467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,128,0.014897066354751586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,64,0.0030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,64,0.0147189329067866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,1024,32,0.0031093334158261614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,1024,32,0.014877866705258688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,2048,0.008988799651463826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,65536,0.060318934917449954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,65536,0.12863360246022543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,16384,0.020537600914637247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,16384,0.04053119818369548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,12288,0.017333332697550455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,12288,0.03338453372319539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,1024,0.006692266464233399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,10240,0.016498133540153503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,10240,0.030401066939036055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,65536,0.14722347259521484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,16384,0.03821546634038289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,768,0.006233599781990051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,8192,0.014453333616256715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,512,0.0057322666049003605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,256,0.005308799942334493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,8192,0.027185066541035967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,1024,128,0.005125333368778229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,7168,0.013432533542315165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,7168,0.026039467255274458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,6144,0.02487679918607076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,12288,0.029623466730117797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,6144,0.012402133146921793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,5120,0.01141973336537679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,5120,0.022539732853571574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,5120,0.014980266491572062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,4096,0.009934932986895243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,4096,0.020080000162124634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,3584,0.009268266956011455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,3584,0.01996586720148722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,3072,0.007930666704972585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,3072,0.019846399625142418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,10240,0.025275733073552447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,2560,0.006897066533565521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,2560,0.019189333915710448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,2048,0.00597866674264272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,2048,0.017432532707850137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,2048,0.008771199981371562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,8192,0.02150933345158895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,1536,0.005390933156013489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,1536,0.017504000663757326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,7168,0.01912320057551066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,1024,0.004604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,6144,0.01664959987004598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,1024,0.016463999946912132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,768,0.004173866907755534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,768,0.01597546637058258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,512,0.003764266769091288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,4096,0.012584533294041952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,512,0.015158399939537048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,3584,0.01188693344593048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,256,0.0034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,256,0.015020799636840821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,3072,0.010213333368301391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,128,0.003068800022204717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,128,0.015159466862678527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,2560,0.009634133179982502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,64,0.0029674666623274487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,64,0.014299733440081277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,768,32,0.002924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,768,32,0.014548266927401224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,1536,0.007683200140794118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,65536,0.042182401816050215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,1024,0.006491733094056447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,65536,0.12132906913757324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,65536,0.1262997309366862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,16384,0.014855466286341348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,768,0.006041599810123444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,16384,0.0371946652730306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,12288,0.01236799955368042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,12288,0.03134613235791524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,512,0.005621333420276642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,10240,0.013451733191808066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,10240,0.028612265984217327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,8192,0.011785599589347839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,8192,0.02690560022989909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,7168,0.011085866888364156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,7168,0.025546665986378985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,128,0.004825599988301595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,6144,0.010312533378601075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,6144,0.023098667462666832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,5120,0.009434666236241658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,5120,0.021576533714930214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,16384,0.03386346499125163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,4096,0.0086816002925237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,12288,0.026539733012517292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,4096,0.020581332842508952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,10240,0.022731733322143555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,4096,0.01143893301486969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,768,256,0.0052138666311899815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,3584,0.008014933268229166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,3584,0.020376533269882202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,3072,0.007275733351707459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,8192,0.019003732999165853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,3072,0.01916266679763794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,2560,0.0064629331231117245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,2560,0.018660267194112144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,7168,0.016694400707880655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,2048,0.005714133381843567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,2048,0.017504000663757326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,6144,0.015015467007954916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,1536,0.0050805335243542995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,1536,0.017180800437927246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,1024,0.004378666480382284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,1024,0.016563199957211814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,768,0.004008533308903376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,5120,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,768,0.016152532895406087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,512,0.003587199995915095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,512,0.015475199619928996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,3584,0.01106666624546051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,256,0.0032586666444937387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,3072,0.009805867075920105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,256,0.014862933754920959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,2560,0.009050666292508443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,128,0.0030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,128,0.014513066411018372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,64,0.002796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,2048,0.008184533317883809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,64,0.014174933234850565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,512,32,0.002919466545184453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,1536,0.007456000149250031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,512,32,0.014389333128929139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,65536,0.02983786662419637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,65536,0.1123967965443929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,16384,0.011591466267903645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,65536,0.12109013398488362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,16384,0.03222080071767171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,12288,0.01002239982287089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,768,0.0056981335083643595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,12288,0.027048534154891966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,10240,0.008915199836095174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,10240,0.026417066653569538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,512,0.0053493330876032506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,8192,0.008060800035794576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,8192,0.0243285338083903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,7168,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,7168,0.02290560007095337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,256,0.005086933573087057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,6144,0.007110400001207988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,128,0.004849066833655039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,6144,0.022267733017603555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,5120,0.007969066500663757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,512,1024,0.006355200211207073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,5120,0.021656533082326256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,4096,0.007201066613197327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,4096,0.01990506649017334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,16384,0.03245226740837097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,3584,0.006975999971230824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,12288,0.02539413372675578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,3584,0.019372800985972084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,10240,0.021911466121673585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,3072,0.006659199794133504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,3072,0.01840533415476481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,8192,0.018282665808995565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,2560,0.006270933151245117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,2560,0.017732266585032144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,7168,0.016377600034077962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,2048,0.005606399973233541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,6144,0.015013333161671957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,2048,0.017172267039616905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,1536,0.00486826648314794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,1536,0.016883200407028197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,4096,0.011403733491897583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,1024,0.004242133100827535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,1024,0.015982932845751443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,3584,0.010925867160161336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,768,0.0037770666182041167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,768,0.015409066279729208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,3072,0.009704533219337463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,768,0.005689600110054016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,512,0.00344106654326121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,512,0.015432533621788026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,256,0.003234133372704188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,256,0.014637866616249084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,128,0.002940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,2560,0.008950400352478027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,5120,0.0132533331712087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,128,0.014259200294812521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,2048,0.008210133512814839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,64,0.002792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,64,0.014412800470987955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,256,32,0.00275093341867129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,256,32,0.01423679987589518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,65536,0.023084799448649086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,1536,0.007347199817498524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,65536,0.10877973238627117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,16384,0.008411733309427898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,16384,0.029958399136861165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,12288,0.008334933718045553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,1024,0.006071466704209646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,12288,0.026370133956273394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,10240,0.007686399916807811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,10240,0.02523733377456665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,8192,0.0071050668756167095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,8192,0.02297280033429464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,512,0.005400533477465311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,7168,0.0067093332608540845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,256,0.004965333143870035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,7168,0.022503467400868733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,6144,0.006369066735108693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,256,128,0.0046293333172798155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,6144,0.02173973321914673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,5120,0.006725333134333293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,5120,0.02146453261375427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,16384,0.03285119930903117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,4096,0.006246399879455566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,12288,0.025438932577768962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,4096,0.019844265778859456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,10240,0.0218666672706604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,3584,0.006701866785685222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,3584,0.01941653291384379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,3584,0.010786133011182149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,3072,0.006414933502674103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,3072,0.01904319922129313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,2560,0.006202666461467743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,7168,0.016511999567349753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,2560,0.018202666441599527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,2048,0.005551999807357788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,2048,0.016768000523249307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,6144,0.014879999558130899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,1536,0.004802133142948151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,65536,0.12309226989746094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,1536,0.016757333278656007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,5120,0.0131221334139506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,1024,0.004124800115823746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,1024,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,768,0.003832533210515976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,768,0.015617066621780395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,4096,0.011271466811498005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,512,0.0033749334514141084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,8192,0.018410666783650716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,512,0.015202132860819497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,256,0.0030218665798505146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,3072,0.009603200356165568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,256,0.014604799946149192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,256,0.004924799998601278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,128,0.0028607999285062153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,2560,0.008777599533398945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,128,0.014332800110181173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,64,0.0026730666557947796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,64,0.014486400286356607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,128,32,0.0027935999135176343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,2048,0.008039466540018718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,128,32,0.01421440045038859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,65536,0.020600533485412596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,16384,0.0070783997575442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,65536,0.10651413599650066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,16384,0.030138667424519854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,12288,0.006393600006898243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,10240,0.006708266834417979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,12288,0.02632746696472168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,8192,0.006470400094985962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,10240,0.025146667162577314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,8192,0.0230730672677358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,1024,0.006159999966621399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,7168,0.006391466657320659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,1536,0.00745066652695338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,7168,0.024215465784072875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,6144,0.006074666480223338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,768,0.00580266664425532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,5120,0.021166932582855225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,6144,0.021901865800221763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,5120,0.006457599997520447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,4096,0.006145066519578298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,4096,0.02035413384437561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,3584,0.006678399940331777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,3584,0.01900586684544881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,3072,0.0061749334136645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,3072,0.018310399850209554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,128,0.004677333434422811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,2560,0.006086400151252747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,2560,0.017821866273880004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,2048,0.005397333204746247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,2048,0.01697280009587606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,1024,0.015689599514007568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,1536,0.004758400221665701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,1536,0.0165802667538325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,1024,0.0041002665956815084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,768,0.003684266656637192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,768,0.015609600146611533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,512,0.003399466723203659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,256,0.0029781334102153777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,512,0.014838400483131408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,384,128,512,0.005318399767080943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,256,0.014759467045466105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,128,0.0028618666032950084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,128,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,64,0.0026506667335828146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,64,0.014266666769981385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,64,32,0.0026687999566396077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,64,32,0.014152533809343972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,65536,0.018947199980417887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,12288,0.026332799593607587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,10240,0.006674133241176605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,16384,0.007845333218574524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,8192,0.022843732436498006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,65536,0.10576746463775635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,16384,0.029550933837890626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,12288,0.007079466680685679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,10240,0.02452053427696228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,8192,0.006566399832566579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,7168,0.006212266782919565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,7168,0.02292693257331848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,6144,0.006082133452097575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,6144,0.021463465690612794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,5120,0.0064298664530118305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,5120,0.02082560062408447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,4096,0.006010666489601135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,4096,0.01985599994659424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,3584,0.006466133395830791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,3584,0.018992000818252565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,3072,0.006085333228111267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,3072,0.018691200017929076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,2560,0.0060917332768440245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,2560,0.01750613252321879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,2048,0.005367466807365417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,2048,0.01728960076967875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,768,0.015453867117563882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,1536,0.004728533327579498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,1536,0.016461867094039916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,1024,0.004103466620047887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,1024,0.01616853376229604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,768,0.0037248000502586366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,512,0.0033589333295822145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,512,0.015255467096964518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,256,0.0030421334008375804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,256,0.014477866888046264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,128,0.002898133297761281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,128,0.014801067113876343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,64,0.002794666588306427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,64,0.014173866311709086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,384,32,32,0.002643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,384,32,32,0.014410666624704995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,16384,0.4055477460225423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,16384,0.7545920054117838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,12288,0.31783679326375325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,12288,0.5647071838378906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,10240,0.46998294194539386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,10240,0.26511573791503906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,10240,0.3029952049255371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,8192,0.22037653923034667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,8192,0.3750101407368978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,7168,0.33068691889444984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,7168,0.19121707280476888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,6144,0.2898634592692057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,6144,0.17431893348693847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,5120,0.23995626767476402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,5120,0.1459338665008545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,4096,0.19366933504740397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,4096,0.12205759684244792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,4096,0.1345695972442627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,3584,0.17189119656880697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,3584,0.10738026301066081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,3584,0.11997439861297607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,16384,0.4984394709269206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,12288,0.3674965222676595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,3072,0.1490495999654134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,3072,0.10378026962280273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,2560,0.1255786657333374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,8192,0.2581855932871501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,2560,0.08551039695739746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,7168,0.21806507110595702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,2048,0.10245440006256104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,2048,0.07139200369517008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,6144,0.19258240063985188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,2048,0.07717333634694418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,1536,0.07923733393351237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,1536,0.05909333229064941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,5120,0.160044797261556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,1024,0.05475200017293295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,1024,0.04792533318201701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,1024,0.04787733157475789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,768,0.04591999848683675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,768,0.042328532536824545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,512,0.033394134044647215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,512,0.03682133356730143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,256,0.023538132508595787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,256,0.03143466711044311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,128,0.018288000424702962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,128,0.027654399474461872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,128,0.028161066770553588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,64,0.01930239995320638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,3072,0.10261013507843017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,64,0.02882986664772034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,65536,32,0.020049067338307698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,65536,32,0.031370667616526286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,1536,0.06285653511683145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,65536,0.44508692423502605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,65536,0.7545738855997721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,65536,0.5390303929646809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,16384,0.1858261267344157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,16384,0.12885653177897136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,768,0.043945598602294925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,12288,0.14336320559183757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,12288,0.09340373675028482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,512,0.03580480019251506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,10240,0.1182645320892334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,10240,0.08171412944793702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,8192,0.09349760214487711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,256,0.030819199482599896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,8192,0.06683093706766764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,8192,0.06565866470336915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,7168,0.08237120310465494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,7168,0.060946134726206455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,65536,2560,0.08870506286621094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,6144,0.05329493284225464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,6144,0.07151253223419189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,6144,0.05073493321736654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,5120,0.061305598417917884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,5120,0.047058133284250896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,4096,0.04919679959615071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,4096,0.035598933696746826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,3584,0.04343039989471435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,4096,0.04150293270746867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,3584,0.03805866638819377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,3072,0.03816746473312378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,3072,0.03519253333409627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,2560,0.03180586695671082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,16384,0.12865493297576905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,2560,0.03216639955838521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,2560,0.026588799556096394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,12288,0.09728319644927978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,2048,0.026155734062194826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,10240,0.08033280372619629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,2048,0.029140265782674153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,2048,0.022959999243418374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,1536,0.021035732825597127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,1536,0.025421865781148273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,1024,0.015634133418401083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,1024,0.023114667336146037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,7168,0.05885759989420573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,768,0.013123200337092081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,768,0.021706666549046835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,512,0.01060479978720347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,512,0.020125865936279297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,5120,0.043271466096242266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,256,0.007666133344173431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,256,0.018088533480962118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,128,0.005924266576766968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,3584,0.03303359945615132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,128,0.017336533466974894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,128,0.009822932879130046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,64,0.005310933291912079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,64,0.01730666756629944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,16384,32,0.005660800139109293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,16384,32,0.017164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,65536,0.5695349375406902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,1536,0.019232000907262167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,65536,0.35465494791666663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,1024,0.015822933117548624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,16384,0.14245972633361817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,16384,0.09774400393168131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,768,0.014683733383814493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,12288,0.10766826470692951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,12288,0.07631893157958984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,512,0.012506666779518127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,10240,0.09224747021993002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,10240,0.06569813489913941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,256,0.01040000021457672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,16384,3072,0.02955840031305949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,8192,0.07445120016733806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,10240,0.0694645325342814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,8192,0.05539733171463013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,8192,0.05748480161031087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,7168,0.06542506615320841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,7168,0.050179199377695716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,6144,0.056637867291768396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,6144,0.04515519936879476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,5120,0.04743359883626302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,5120,0.04013013442357381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,4096,0.03820586601893107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,4096,0.0355840007464091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,16384,0.10750400225321452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,65536,0.46041491826375325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,3584,0.033556266625722246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,12288,0.0807039976119995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,3584,0.03578879833221436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,3584,0.027824000517527266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,3072,0.029045333464940388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,3072,0.030406399567921953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,2560,0.025013333559036253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,2560,0.028112000226974486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,7168,0.04873600006103516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,2048,0.020732800165812172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,2048,0.025534933805465697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,1536,0.016422399878501893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,1536,0.023155200481414794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,6144,0.043082666397094724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,1024,0.012460800011952718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,1024,0.021288534005482994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,768,0.010718933741251628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,768,0.02002453406651815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,4096,0.030678399403889972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,512,0.008813866972923278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,512,0.017770665884017944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,3072,0.024758400519688924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,256,0.006413866579532623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,2560,0.022359466552734374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,256,0.01690773367881775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,2048,0.019058134158452353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,128,0.005273599922657013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,5120,0.03774079879124959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,128,0.016156799594561257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,1536,0.015664000312487283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,64,0.004523733258247375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,64,0.016588800152142844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,12288,32,0.004885333279768625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,1024,0.013565867145856222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,12288,32,0.016973867019017538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,16384,0.12377706368764241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,65536,0.47436478932698567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,65536,0.317630926767985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,16384,0.08730560143788656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,12288,0.091976531346639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,12288,0.06856853167215983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,512,0.009898666540781658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,10240,0.07889599800109863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,10240,0.05942399899164835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,8192,0.06278719902038574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,8192,0.05057280063629151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,768,0.011859200398127238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,256,0.008468266328175862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,7168,0.055326934655507407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,7168,0.04625920057296753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,12288,128,0.007925333579381307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,6144,0.047707732518514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,6144,0.04219413201014201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,5120,0.0397269328435262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,5120,0.03744639952977498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,12288,0.07308373451232911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,4096,0.03277973333994548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,4096,0.033854933579762776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,65536,0.3737600008646647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,16384,0.09597012996673585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,3584,0.031294933954874676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,3584,0.03128746747970581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,3072,0.02547733386357625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,3072,0.02881706754366557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,6144,0.038466131687164305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,2560,0.022105600436528525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,2560,0.026421332359313966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,8192,0.05109440088272095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,2048,0.01858560045560201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,2048,0.025166932741800947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,5120,0.03358613252639771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,1536,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,1536,0.022950400908788048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,10240,0.06250986655553183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,1024,0.011894399921099346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,1024,0.02101866602897644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,3072,0.022196267048517862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,768,0.010129066308339436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,768,0.01884053349494934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,7168,0.045102934042612716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,4096,0.027847466866175334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,512,0.008467200398445129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,512,0.01895893414815267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,3584,0.0253983994325002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,256,0.005746133128801982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,256,0.016900267203648886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,2560,0.020387200514475505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,128,0.004931200047334035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,128,0.01629973351955414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,2048,0.01762346625328064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,1536,0.014316800236701965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,64,0.004453333218892416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,64,0.01654293338457743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,10240,32,0.004664533336957296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,10240,32,0.01658986707528432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,65536,0.3726293245951334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,65536,0.26399893760681153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,16384,0.09803413550059001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,16384,0.0737237294514974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,12288,0.07442986965179443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,12288,0.05826773246129354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,12288,0.05103466510772705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,10240,0.06213440100351969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,10240,0.05068586667378744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,10240,0.04288640022277832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,8192,0.050076798597971595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,512,0.008896000186602275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,256,0.007597866654396057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,8192,0.04380373160044353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,128,0.007006933291753133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,7168,0.043160533905029295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,7168,0.03999360005060832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,6144,0.03743360042572021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,65536,0.26142613093058265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,6144,0.036722131570180255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,5120,0.032832000652949014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,768,0.010571733117103577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,5120,0.03280319968859355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,16384,0.06500800053278605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,4096,0.026434133450190227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,4096,0.02948480049769084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,3584,0.023478400707244874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,3584,0.027140265703201293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,3072,0.020603734254837035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,3072,0.025500800212224322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,3072,0.017808000246683754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,2560,0.017757866779963175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,2560,0.024140799045562746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,2560,0.015849600235621132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,2048,0.014682666460673014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,8192,0.035318398475646974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,2048,0.02233920097351074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,7168,0.032202666997909545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,1536,0.011948800086975098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,1536,0.02105600039164225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,6144,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,1024,0.00944213370482127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,1024,0.019099734226862588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,5120,0.024576000372568765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,768,0.00819946676492691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,4096,0.020987733205159505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,768,0.018793600797653198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,512,0.006528000036875407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,512,0.017249067624409996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,3584,0.01962560017903646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,256,0.005081599950790406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,256,0.01604159971078237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,128,0.0044608001907666525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,128,0.015518933534622192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,2048,0.014078933000564575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,64,0.004101333270470301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,10240,1024,0.012495999534924824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,1536,0.012187733252843221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,64,0.015878400206565856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,8192,32,0.004507733384768168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,8192,32,0.01623466710249583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,1024,0.010403199990590414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,768,0.008968533078829447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,16384,0.08675519625345865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,65536,0.2334826628367106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,65536,0.3440575917561849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,512,0.007900799810886382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,16384,0.06666026512781778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,256,0.007439999779065449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,65536,0.25863146781921387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,12288,0.06657706499099732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,12288,0.056321068604787194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,10240,0.05384960174560547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,10240,0.047033600012461346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,8192,0.040839465459187825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,8192,128,0.006862933437029521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,8192,0.04499520063400268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,7168,0.03741333484649658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,7168,0.03843733469645182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,6144,0.033931732177734375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,6144,0.034075733025868735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,6144,0.026943999528884887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,5120,0.028732800483703615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,5120,0.030538666248321533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,4096,0.024218666553497314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,4096,0.028068266312281293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,16384,0.06643626689910889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,3584,0.020891733964284263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,12288,0.04916266600290935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,3584,0.025778132677078246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,3584,0.01871573328971863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,3072,0.01831573247909546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,10240,0.04264959891637166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,3072,0.024553600947062174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,2560,0.015826132893562318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,2560,0.023011199633280435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,8192,0.03518079916636149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,2048,0.012929067015647888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,7168,0.031447466214497885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,2048,0.021499733130137123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,1536,0.010975999633471172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,1536,0.020171733697255452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,1024,0.008601599931716919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,5120,0.02404159903526306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,1024,0.017917867501576742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,768,0.007704533139864604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,768,0.017016534010569254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,768,0.007818666597207386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,512,0.005870933334032694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,512,0.016389333208402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,256,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,256,0.016190933187802632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,3072,0.016661333044370015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,256,0.006291200220584869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,128,0.00405973345041275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,2560,0.015080533425013223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,128,0.015508266290028891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,2048,0.013319466511408487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,64,0.0038005332152048744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,64,0.015556266903877259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,7168,32,0.0040554667512575785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,7168,32,0.01577600042025248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,1536,0.011177600423494975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,4096,0.020329600572586058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,65536,0.293283208211263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,65536,0.21680639584859213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,1024,0.009439999858538311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,16384,0.08043839931488037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,16384,0.06324906746546427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,12288,0.05649706522623697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,12288,0.05034026702245077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,10240,0.04862720171610514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,10240,0.044278399149576826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,10240,0.03684693177541097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,8192,0.03922666708628337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,8192,0.03917760054270426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,512,0.007000533243020375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,7168,0.034482133388519284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,7168,0.03565973440806071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,6144,0.030044800043106078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,6144,0.03172053297360738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,5120,0.024936532974243163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,7168,128,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,5120,0.028963200251261395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,4096,0.02148373325665792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,4096,0.025976532697677614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,65536,0.22519572575887045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,16384,0.05847893158594767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,3584,0.019553067286809285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,12288,0.044113067785898845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,3584,0.025865600506464644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,3072,0.016455466548601784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,3072,0.023869866132736207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,2560,0.013896532853444419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,8192,0.0309663991133372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,2560,0.022111999988555908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,2048,0.011803733309110005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,7168,0.027483733495076497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,2048,0.02112213373184204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,6144,0.024013866980870567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,1536,0.00976746678352356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,1536,0.01939199964205424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,1536,0.01062506635983785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,1024,0.007950933277606964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,5120,0.021585067113240562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,1024,0.017560533682505288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,768,0.0070154666900634766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,768,0.017114667097727458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,4096,0.01803626616795858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,512,0.005550933380921682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,512,0.01660053332646688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,256,0.004558933277924856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,256,0.015662933389345803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,3584,0.016758400201797485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,128,0.004058666775623957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,3072,0.015581867098808289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,128,0.015312000115712484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,64,0.0038090666135152185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,2560,0.014274133245150247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,64,0.015347199638684592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,6144,32,0.003987200061480204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,6144,32,0.01569919983545939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,2048,0.01246399978796641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,65536,0.24254612922668456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,65536,0.1981002648671468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,65536,0.19918826421101887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,1024,0.008534399668375652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,16384,0.07290986378987631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,16384,0.062317868073781334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,16384,0.05283840099970499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,12288,0.05390826861063639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,768,0.0075989335775375364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,12288,0.046625065803527835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,10240,0.043416531880696614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,10240,0.041006934642791745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,512,0.006818133095900218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,8192,0.037625598907470706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,8192,0.037880531946818036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,256,0.0063360000650088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,8192,0.028277333577473956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,7168,0.03132800062497457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,7168,0.03401493231455485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,6144,0.02611306707064311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,6144,0.029655466477076214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,5120,0.023217066129048666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,6144,128,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,5120,0.02812160054842631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,4096,0.020822399854660036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,4096,0.025043199459711712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,3584,0.018150399128595986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,3584,0.025305600961049397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,12288,0.04019519885381063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,3072,0.015312000115712484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,3072,0.022834134101867676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,10240,0.03353386720021566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,2560,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,2560,0.021081600586573282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,7168,0.025194666783014935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,2048,0.010857599973678588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,2048,0.020347734292348228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,1536,0.009240532914797466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,6144,0.02207466761271159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,1536,0.019241599241892497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,5120,0.019577600558598838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,1024,0.007179733117421467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,1024,0.017361066738764443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,4096,0.016739199558893837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,768,0.006260266900062561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,768,0.016874667008717856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,512,0.005167999863624572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,512,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,3584,0.015307733416557312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,256,0.004324266811211904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,256,0.015824000040690102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,3072,0.014262400070826211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,128,0.003978666663169861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,2560,0.012965333461761475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,128,0.01548373301823934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,64,0.003570133447647095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,2048,0.011457066734631855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,64,0.015307733416557312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,5120,32,0.003774933268626531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,5120,32,0.015278933445612588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,1536,0.009668266773223877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,65536,0.19107413291931152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,1024,0.0077344000339508055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,65536,0.16960959434509276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,16384,0.05461013317108154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,768,0.007130666573842366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,16384,0.054604800542195644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,12288,0.04208213488260905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,12288,0.04410880009333293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,10240,0.03502080043156942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,512,0.006345599889755249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,10240,0.039188265800476074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,8192,0.02948266665140788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,8192,0.034644265969594315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,256,0.005784533421198527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,7168,0.027669332424799603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,7168,0.030821333328882854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,6144,0.022215465704600014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,5120,128,0.005478399991989136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,6144,0.028462932507197065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,5120,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,5120,0.025906133651733398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,65536,0.20352320671081542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,16384,0.04357866843541463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,4096,0.01690346598625183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,12288,0.03253226677576701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,4096,0.02493120034535726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,3584,0.01430506706237793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,3584,0.023466666539510093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,10240,0.028112000226974486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,3072,0.013355732957522074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,8192,0.024897066752115886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,3072,0.02150613268216451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,2560,0.011771733562151592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,2560,0.020462934176127115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,7168,0.021564799547195434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,2048,0.009603200356165568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,2048,0.019663999478022255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,1536,0.00831573357184728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,1536,0.018184532721837364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,6144,0.018987733125686645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,5120,0.017083734273910522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,1024,0.006446933249632518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,1024,0.016966400543848674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,768,0.0053258667389551794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,768,0.016633599996566772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,4096,0.01460693379243215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,512,0.004643199841181437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,512,0.015947733322779337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,3072,0.013296000162760415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,256,0.004093866546948751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,256,0.015692800283432007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,2048,0.010683733224868774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,128,0.0035455999275048576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,3584,0.013985066612561544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,128,0.014883200327555338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,64,0.0033802665770053864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,64,0.015096533298492431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,2560,0.011949867010116577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,4096,32,0.0035573333501815797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,1024,0.007355733215808869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,4096,32,0.015074132879575094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,65536,0.17113919258117677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,65536,0.15563839276631672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,16384,0.04861546754837036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,16384,0.051641599337259925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,1536,0.009081600109736125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,12288,0.03816106716791789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,12288,0.04152533213297526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,10240,0.03341653347015381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,10240,0.03674986759821574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,768,0.00694400022427241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,8192,0.02693013350168864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,8192,0.031420799096425374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,512,0.006548266609509785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,256,0.006050133208433787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,4096,128,0.005809066692988077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,7168,0.023928532997767128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,7168,0.028911999861399335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,6144,0.02097813288370768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,6144,0.026814933617909747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,5120,0.017800533771514894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,5120,0.025356799364089966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,12288,0.0334666649500529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,65536,0.17173226674397785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,4096,0.014631467064221701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,4096,0.022950400908788048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,16384,0.043567999203999834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,3584,0.012910933295885722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,3584,0.02225173314412435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,3072,0.011749333143234253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,3072,0.0207370658715566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,10240,0.029100799560546876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,2560,0.010284800330797832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,2560,0.019831466674804687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,6144,0.01885653336842855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,7168,0.02212266723314921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,2048,0.008844799796740214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,8192,0.02483733296394348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,2048,0.018206934134165444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,1536,0.0077344000339508055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,1536,0.017544533809026083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,1024,0.005750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,1024,0.0165802667538325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,5120,0.01770026683807373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,768,0.0051242664456367494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,768,0.016471466422080992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,4096,0.014691199858983359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,3072,0.012644267082214356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,512,0.004401066899299621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,512,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,256,0.0038261334101359046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,3584,0.013874133427937826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,256,0.015190399686495461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,128,0.00348693331082662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,128,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,2560,0.011781332890192668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,64,0.0032640000184377036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,64,0.01490133305390676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3584,32,0.003239466746648153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3584,32,0.015222400426864624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,1536,0.008368000388145447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,65536,0.1492512067159017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,65536,0.14231039683024088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,16384,0.04360853433609009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,16384,0.049960533777872726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,2048,0.010381866494814556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,1024,0.007281066477298736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,12288,0.03424213329950969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,12288,0.03943039973576863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,10240,0.029785599311192828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,10240,0.034705066680908205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,768,0.006622933348019918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,8192,0.026691200335820515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,8192,0.031204267342885332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,256,0.005667200187842051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,512,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,7168,0.022334933280944824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3584,128,0.005288533369700114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,7168,0.028163200616836546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,6144,0.018922666708628334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,6144,0.025308799743652344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,65536,0.1632266680399577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,12288,0.032425600290298465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,5120,0.01621333360671997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,16384,0.04141120115915935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,5120,0.024153600136439003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,4096,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,4096,0.02323946754137675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,3584,0.012449066837628682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,3584,0.021721599499384563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,3072,0.010781866312026978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,10240,0.027293866872787474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,3072,0.020453333854675293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,2560,0.00973653296629588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,2560,0.019320533672968546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,8192,0.023412267367045082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,2048,0.008666666348775227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,2048,0.018692266941070557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,6144,0.018398932615915933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,1536,0.007136000196139018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,1536,0.016934400796890257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,7168,0.020787199338277183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,1024,0.0053962667783101406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,1024,0.01639359990755717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,3072,0.011900800466537475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,4096,0.013833600282669067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,768,0.004748799900213877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,768,0.016301866372426352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,5120,0.01637333333492279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,512,0.004218666752179464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,512,0.01565439999103546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,256,0.0036917333801587426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,3584,0.013086932897567748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,256,0.015253333250681558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,2560,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,128,0.00329066663980484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,128,0.014811733365058899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,64,0.0031317333380381264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,1536,0.0076682666937510175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,64,0.014844800035158793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,3072,32,0.003386666625738144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,3072,32,0.015079466501871744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,65536,0.13022826512654623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,2048,0.009742933511734008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,65536,0.1320181369781494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,16384,0.041289599736531575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,1024,0.006844800213972728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,16384,0.04649920066197713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,12288,0.030794666210810347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,12288,0.03841919898986816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,10240,0.026793599128723145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,10240,0.03186879952748616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,768,0.006292266647020976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,512,0.005810133119424184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,8192,0.02499306599299113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,256,0.005321600039800008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,8192,0.02809600035349528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,3072,128,0.005145599941412607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,7168,0.018532266219456993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,7168,0.026370133956273394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,6144,0.016127999623616537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,6144,0.024643200635910033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,5120,0.01390506625175476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,5120,0.023125332593917847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,12288,0.03113600015640259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,65536,0.1709333260854085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,4096,0.0118559996287028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,16384,0.04078186750411987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,4096,0.022408533096313476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,3584,0.011020800471305848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,3584,0.021284266312917074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,3072,0.00992746651172638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,3072,0.019820799430211387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,10240,0.0262880007425944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,2560,0.009051733215649923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,2560,0.018209065993626913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,6144,0.017358932892481485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,8192,0.02249493400255839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,7168,0.02027413249015808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,2048,0.007796266674995422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,2048,0.017898666858673095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,1536,0.0063701331615448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,1536,0.017299199104309083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,1024,0.004855466882387797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,1024,0.016275200247764587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,5120,0.015990400314331056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,768,0.004508799811204275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,768,0.015982932845751443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,4096,0.013520000378290811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,3072,0.01165120005607605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,512,0.003992533435424169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,512,0.015379200379053751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,256,0.003537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,3584,0.012896000345547994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,256,0.014851199587186179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,128,0.0032106667757034303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,128,0.01458346645037333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,2560,0.010311466455459595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,64,0.0031317333380381264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,64,0.01458560029665629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,1536,0.00767573316891988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2560,32,0.0031914666295051576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2560,32,0.01470080018043518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,2048,0.009160533547401428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,65536,0.10279573599497478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,1024,0.006909866631031036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,65536,0.11749013264973958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,768,0.006359466910362243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,16384,0.030961066484451294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,16384,0.04365013440450032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,12288,0.025974400838216144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,12288,0.03224106629689534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,10240,0.023038933674494423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,10240,0.02908373276392619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,512,0.005706666906674703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,256,0.0053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,8192,0.023099732398986817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,8192,0.028497066100438433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,7168,0.01529706617196401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,7168,0.02536746660868327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2560,128,0.005117866893609365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,6144,0.015505066514015198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,6144,0.02286720077196757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,5120,0.011787733435630799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,5120,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,16384,0.03396906852722168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,65536,0.15510187149047852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,4096,0.010621866583824158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,12288,0.02683093349138896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,4096,0.019827200969060262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,3584,0.009529599547386169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,3584,0.019272534052530925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,3072,0.008718933661778767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,3072,0.018691200017929076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,10240,0.022858667373657226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,2560,0.007852800190448761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,2560,0.018057600657145182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,8192,0.019469867149988808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,2048,0.0069365332523981735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,7168,0.01791999936103821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,2048,0.017560533682505288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,1536,0.005870933334032694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,6144,0.015904000401496886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,1536,0.01688106656074524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,1024,0.004758400221665701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,1024,0.01613759994506836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,5120,0.014076800147692362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,768,0.004439466694990794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,768,0.015895467003186545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,4096,0.012756266196568809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,512,0.004069333275159201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,512,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,3072,0.010643200079600016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,3584,0.011934933066368104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,256,0.003537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,256,0.014993066589037577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,128,0.0032117334504922234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,128,0.014886400103569031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,2048,0.0082997332016627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,64,0.0030048000315825146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,64,0.014620799819628397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,2560,0.00969599982102712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,2048,32,0.0032799998919169106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,1024,0.006379733482996623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,2048,32,0.014886400103569031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,65536,0.07981866995493571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,768,0.006030933558940887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,65536,0.10557759602864583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,16384,0.025244800249735515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,1536,0.007514666517575581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,16384,0.04024426539738973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,12288,0.02032426595687866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,12288,0.032663466533025105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,10240,0.019799466927846274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,10240,0.029126399755477907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,8192,0.018556799491246542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,8192,0.026307199398676557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,512,0.005674666663010915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,7168,0.015066666404406228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,256,0.005283200244108836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,7168,0.02521386742591858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,2048,128,0.00508480022350947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,6144,0.013462400436401368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,6144,0.02325119972229004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,5120,0.011808000008265178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,5120,0.022025599082310995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,12288,0.027896533409754436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,65536,0.13733867009480794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,4096,0.010205866893132527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,16384,0.037043201923370364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,4096,0.02078826626141866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,3584,0.009316266576449076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,3584,0.01985493302345276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,3072,0.008684800068537394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,3072,0.019038933515548705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,8192,0.02065066695213318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,2560,0.007812266548474629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,2560,0.018599466482798258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,6144,0.01630506714185079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,2048,0.006715733309586842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,7168,0.01872106591860453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,2048,0.017308799425760905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,10240,0.024010666211446128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,1536,0.00565119981765747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,1536,0.016455466548601784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,5120,0.014674133062362671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,1024,0.004769066472848257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,1024,0.016455466548601784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,768,0.0042912001411120095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,4096,0.012106666962305706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,768,0.01608746647834778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,3072,0.009986133376757304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,512,0.003828266759713491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,3584,0.011374933520952861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,512,0.015365333358446757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,256,0.003307733436425527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,256,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,128,0.003155199935038885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,128,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,2560,0.00962559978167216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,64,0.002926933268706004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,64,0.014673067132631936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1536,32,0.0030122667551040648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1536,32,0.014863999684651694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,1536,0.007281066477298736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,65536,0.0544106682141622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,2048,0.008454400300979614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,65536,0.09428479671478271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,16384,0.01998186707496643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,16384,0.03425706624984741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,12288,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,12288,0.0279423991839091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,1024,0.006558933357397716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,10240,0.014989866813023885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,10240,0.02686186631520589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,768,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,512,0.0056202664971351625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,256,0.005143466591835022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,8192,0.012604799866676331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,8192,0.024550400177637734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,7168,0.01162453293800354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1536,128,0.005017599960168203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,7168,0.02416213353474935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,6144,0.010732799768447876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,6144,0.02228053410847982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,5120,0.009973333279291789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,5120,0.02142826716105143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,65536,0.13632213274637858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,16384,0.030962133407592775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,4096,0.010710400342941285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,4096,0.02026559909184774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,3584,0.00853013296922048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,3584,0.01922773321469625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,12288,0.02448106606801351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,3072,0.007956266899903615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,3072,0.018565332889556883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,10240,0.02099626660346985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,2560,0.007356800138950348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,2560,0.01875413258870443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,8192,0.017992534240086875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,2048,0.0063701331615448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,7168,0.016471466422080992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,2048,0.01735573410987854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,6144,0.0141567995150884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,1536,0.00553599993387858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,1536,0.01660160024960836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,1024,0.004725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,5120,0.012489599982897441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,1024,0.01583999991416931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,768,0.004281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,768,0.015777066349983215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,4096,0.010713600118954976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,512,0.0038015998899936674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,512,0.015086932977040609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,3072,0.00944213370482127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,3584,0.01046399970849355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,256,0.003303466737270355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,256,0.0146506667137146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,128,0.003017599880695343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,128,0.014330666263898215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,2560,0.008988799651463826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,64,0.002883200099070867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,2048,0.007898666461308797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,64,0.014732799927393594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,1024,32,0.002890666574239731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,1024,32,0.014672000209490457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,65536,0.043245867888132734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,1024,0.006297599772612255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,65536,0.08880533377329508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,1536,0.0071839998165766404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,16384,0.015842133760452272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,768,0.005730133255322774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,16384,0.0309770663579305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,12288,0.012728533148765564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,12288,0.02773653268814087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,10240,0.011636267105738323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,10240,0.025312000513076784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,8192,0.011881599823633831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,8192,0.025303467114766436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,512,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,256,0.005089066425959269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,7168,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,7168,0.02318933407465617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,1024,128,0.004844800134499868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,6144,0.010441600282986959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,6144,0.02131839990615845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,65536,0.11299946308135986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,16384,0.030584534009297688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,5120,0.009656533598899841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,12288,0.024074665705362954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,5120,0.020218666394551596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,4096,0.00913706620534261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,4096,0.019257599115371705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,10240,0.02064639925956726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,3584,0.008289066453774769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,3584,0.01912320057551066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,3072,0.007225599884986877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,3072,0.018323200941085815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,8192,0.017221333583196004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,2560,0.0064533332983652755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,2560,0.017349332571029663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,2048,0.005737600227197012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,2048,0.017240534226099648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,7168,0.015675733486811318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,1536,0.0050901333491007485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,1536,0.01641279955705007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,6144,0.013680000106493631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,1024,0.004314666489760081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,1024,0.015637333194414772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,5120,0.012058666348457337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,768,0.004090666770935059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,4096,0.010538666447003683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,768,0.01575040022532145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,512,0.0036245333651701607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,3072,0.00944106678167979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,512,0.015126400192578635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,256,0.0032448001205921174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,256,0.014779733618100485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,3584,0.010305066903432209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,128,0.0030048000315825146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,2048,0.007725866635640462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,2560,0.008981333176294962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,128,0.014551466703414917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,64,0.0028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,64,0.014193066954612732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,768,32,0.002898133297761281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,768,32,0.014377599954605103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,65536,0.03664213418960571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,65536,0.0853109359741211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,16384,0.012459733088811238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,16384,0.028203733762105304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,1536,0.007011199990908305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,1024,0.006150400141874949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,12288,0.010265599687894185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,12288,0.024631466468175253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,10240,0.009483733773231506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,10240,0.02386773427327474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,768,0.005628799895445505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,512,0.005338666836420695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,8192,0.009718400239944459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,65536,0.10794239838918049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,8192,0.02184213399887085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,7168,0.00936853289604187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,256,0.005016533533732096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,7168,0.020832000176111858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,6144,0.008577066659927367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,768,128,0.004711466530958811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,6144,0.020294400056203206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,16384,0.030560000737508135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,5120,0.007896533111731212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,5120,0.02037013371785482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,4096,0.007039999961853028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,4096,0.019324799378712974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,3584,0.006964266796906789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,3584,0.019241599241892497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,12288,0.023383466402689616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,3072,0.00658133327960968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,3072,0.018296533823013307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,10240,0.020172800620396933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,2560,0.006523733337720235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,2560,0.01753066579500834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,8192,0.01683626572291056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,2048,0.005735466877619425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,7168,0.015597866972287497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,2048,0.017032533884048462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,1536,0.005043200155099233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,6144,0.013470932841300964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,1536,0.016379732886950174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,5120,0.012055466572443645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,1024,0.004248533149560293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,1024,0.015875200430552162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,4096,0.010363733768463135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,768,0.003835733234882355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,768,0.015530666708946228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,768,0.00558186670144399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,3584,0.010173867146174114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,512,0.0034986667335033415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,512,0.015158399939537048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,256,0.0031093334158261614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,3072,0.00925439993540446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,256,0.014553599556287131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,128,0.002959999938805898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,128,0.014631467064221701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,2560,0.008916266759236654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,64,0.0028480000793933867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,64,0.014478933811187745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,512,32,0.002809600035349528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,2048,0.0078015998005867004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,512,32,0.014485333363215128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,65536,0.02461120088895162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,65536,0.07773760159810385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,1536,0.007014399766921997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,16384,0.010003200173377991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,1024,0.00613013356924057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,16384,0.026969599723815917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,12288,0.008920533458391826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,12288,0.02375040054321289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,12288,0.02284053365389506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,10240,0.00795839975277583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,10240,0.022913066546122234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,8192,0.008162133395671844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,512,0.0052255998055140175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,8192,0.02187946637471517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,7168,0.0077674667040507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,256,0.004911999901135763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,7168,0.021946666638056438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,7168,0.015365333358446757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,6144,0.007267199953397115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,6144,0.02032426595687866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,512,128,0.004711466530958811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,5120,0.00687360018491745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,5120,0.020246400435765585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,5120,0.012125866611798604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,4096,0.0063285330931345625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,4096,0.019051732619603474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,3584,0.0067071999112765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,3584,0.01863893270492554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,3584,0.010148266951243084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,3072,0.00651093324025472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,3072,0.017892267306645712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,65536,0.10631786982218425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,2560,0.0063178668419520065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,16384,0.028961066404978437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,2560,0.01763520042101542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,2560,0.00865066647529602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,2048,0.005560533205668131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,2048,0.016923733552296958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,1536,0.004852266609668731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,10240,0.019617066780726115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,1536,0.01628159979979197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,8192,0.01639786660671234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,1024,0.00420906643072764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,1024,0.016114133596420287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,768,0.003915733347336451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,768,0.015556266903877259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,512,0.0035274667044480645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,6144,0.013428266843159994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,512,0.015056000153223673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,256,0.003268266717592875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,256,0.014808533589045205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,4096,0.010452266534169514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,128,0.002940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,128,0.014648532867431641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,3072,0.009251200159390767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,128,0.004670933385690053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,64,0.0027935999135176343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,64,0.014393599828084311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,256,32,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,256,32,0.014726400375366211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,65536,0.017434666554133095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,2048,0.007686399916807811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,65536,0.0732586701711019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,16384,0.008539733290672303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,1536,0.006980266670385997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,1024,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,16384,0.025778132677078246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,12288,0.007482666770617168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,12288,0.02339306672414144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,10240,0.00697920024394989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,768,0.005487999816735586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,10240,0.022217599550882976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,10240,0.019768534104029338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,512,0.005256533126036326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,8192,0.0065738668044408154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,8192,0.021095466613769532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,7168,0.0064416001240412395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,7168,0.020742400487263998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,256,256,0.004857600231965383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,7168,0.015385599931081137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,6144,0.00621013343334198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,6144,0.020078933238983153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,5120,0.006579199930032094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,5120,0.020390399297078452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,5120,0.012013866504033407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,4096,0.00621013343334198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,4096,0.019186133146286012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,4096,0.010308266679445902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,3584,0.00652266691128413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,3584,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,3072,0.006303999821345012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,65536,0.10700480143229167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,3072,0.017798399925231932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,3072,0.00904319981733958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,16384,0.02918826738993327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,2560,0.0061471998691558834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,2560,0.017256534099578856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,2048,0.005499733487764994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,2048,0.016990933815638223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,12288,0.022915200392405192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,2048,0.007715199887752533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,1536,0.0047775998711586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,1536,0.01623146633307139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,1024,0.00418453315893809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,1024,0.015994667013486227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,8192,0.016619732975959776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,1024,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,768,0.0037258667250474296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,768,0.015269333124160766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,512,0.00342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,6144,0.013402666648228964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,512,0.01514346698919932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,256,0.0030591999491055804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,256,0.014682666460673014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,128,0.002865066627661387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,128,0.01430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,3584,0.010018133123715718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,128,0.004725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,64,0.0026410666604836782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,64,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,128,32,0.002746666719516118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,128,32,0.014342400431632995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,2560,0.00858133335908254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,65536,0.01112320025761922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,16384,0.006743466854095459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,65536,0.07092693646748861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,16384,0.025494400660196943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,12288,0.006348800162474315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,10240,0.0064640000462532045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,10240,0.0221781333287557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,12288,0.023384533325831094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,1536,0.00694400022427241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,8192,0.006450133522351582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,8192,0.021284266312917074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,7168,0.006202666461467743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,7168,0.020631466309229532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,6144,0.006141866743564606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,5120,0.01997013290723165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,768,0.005533866584300995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,6144,0.020372267564137778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,5120,0.006526933113733928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,512,0.005197866757710775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,4096,0.006030933558940887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,4096,0.0195850670337677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,256,128,256,0.00487253318230311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,3584,0.006458666423956554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,3584,0.01821440060933431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,3072,0.0061941335598627726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,3072,0.017478400468826295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,1536,0.016103466351826988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,2560,0.006006399790445963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,2560,0.01748266617457072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,2048,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,768,0.01555519998073578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,2048,0.01693120002746582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,1536,0.004830933113892873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,1024,0.00412266676624616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,256,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,1024,0.015398400028546652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,768,0.0037109332780043284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,512,0.0033290666838486993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,256,0.0029706666866938275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,512,0.015059199929237366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,32,0.014266666769981385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,128,0.0028010666370391845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,64,0.0026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,128,0.014446933070818582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,64,64,0.014173866311709086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,64,32,0.002757333219051361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,65536,0.01162559986114502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,65536,0.06991360187530518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,16384,0.006239999830722809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,16384,0.025919999678929644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,12288,0.0063296000162760425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,12288,0.023294933636983237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,10240,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,10240,0.022116265694300332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,8192,0.006312533219655354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,8192,0.021512534221013388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,7168,0.006123733520507812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,7168,0.020424532890319824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,6144,0.005982933441797892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,6144,0.019870932896931967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,5120,0.006417066852251689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,5120,0.0200437327226003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,4096,0.006005333364009857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,4096,0.020205867290496827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,3584,0.006571733454863231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,3584,0.01877760092417399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,3072,0.006100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,3072,0.01768746574719747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,2560,0.006064000229040781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,2560,0.01713599960009257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,2048,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,2048,0.017240534226099648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,1536,0.004727466901143392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,1536,0.016295466820398966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,1024,0.004003199934959412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,1024,0.015475199619928996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,768,0.003718400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,768,0.015188266833623251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,512,0.00348693331082662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,512,0.01511360009511312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,256,0.0030602666238943735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,256,0.014804266889890037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,128,0.0027744000156720476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,128,0.014451199769973755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,64,0.0027061333258946735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,64,0.01437440017859141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,256,32,32,0.002644266684850057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,256,32,32,0.01395626664161682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,16384,0.6168383916219076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,16384,0.3494762738545736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,12288,0.46701866785685225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,12288,0.25840853055318197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,16384,0.5760351816813152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,10240,0.3902069409688314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,12288,0.446776549021403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,10240,0.23325546582539877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,8192,0.18123413721720377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,8192,0.312935479482015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,7168,0.27611627578735354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,8192,0.32004480361938475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,7168,0.16514453887939454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,6144,0.2355957349141439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,6144,0.14868906339009602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,5120,0.2009258588155111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,6144,0.24301973978678384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,5120,0.12405973275502521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,5120,0.18896106084187825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,4096,0.16288107236226398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,4096,0.10440746943155925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,3584,0.14353386561075848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,3584,0.09513920148213705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,3584,0.1422111988067627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,3072,0.12643520037333172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,3072,0.08453013102213541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,2560,0.10689493020375569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,2560,0.07328106562296549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,2048,0.08818986415863037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,2048,0.06239039897918701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,2048,0.08574720223744711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,1536,0.06870720386505128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,1536,0.0527786652247111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,1024,0.04835306803385417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,1024,0.04301439921061198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,10240,0.3788842519124349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,768,0.03913493156433105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,768,0.040514131387074784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,7168,0.2865578651428223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,512,0.029844266176223756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,512,0.034227200349171955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,256,0.02099840044975281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,256,0.027511467536290485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,4096,0.16499627431233724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,128,0.01537493367989858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,128,0.02569920023282369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,3072,0.1222879966100057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,128,0.027346134185791016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,2560,0.10583893458048503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,64,0.012106666962305706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,64,0.02545173366864522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,65536,32,0.013646933436393737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,65536,32,0.026391466458638508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,65536,0.36082773208618163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,768,0.042865065733591716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,65536,0.6261962890625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,16384,0.1569760004679362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,512,0.035010135173797606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,16384,0.10366186300913494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,12288,0.11833386421203614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,12288,0.08237439791361491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,10240,0.09967253208160401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,10240,0.07075093587239584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,256,0.030109866460164385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,8192,0.0803605318069458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,8192,0.05923839807510376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,8192,0.08584427038828532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,7168,0.07080000241597494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,1536,0.06353600025177002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,7168,0.05383253494898478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,6144,0.06094079812367757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,6144,0.04868799845377604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,65536,1024,0.04665493170420329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,6144,0.06577173471450806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,5120,0.05238613287607828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,5120,0.04378133217493693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,4096,0.042610132694244386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,4096,0.037862400213877365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,4096,0.04506560166676839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,3584,0.03801279862721761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,3584,0.0357151985168457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,65536,0.5987328211466472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,3072,0.03773866494496663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,16384,0.16133333841959635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,12288,0.12281173070271809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,3072,0.03347520033518474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,3072,0.035867734750111895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,2560,0.028755199909210206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,10240,0.10270720322926838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,2560,0.03023253281911214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,2048,0.02373440066973368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,2048,0.026340266068776447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,1536,0.019309866428375243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,1536,0.023572266101837158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,1536,0.020946133136749267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,1024,0.013660800457000733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,7168,0.07420480251312256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,1024,0.021237333615620933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,768,0.011169067025184632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,768,0.020332799355189005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,512,0.008955732981363932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,512,0.018917334079742432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,512,0.012650666634241739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,256,0.006739200154940288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,3584,0.0412778655687968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,256,0.016541866461435954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,128,0.0050005331635475155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,128,0.016110933820406594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,64,0.0045952002207438145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,64,0.01632213294506073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,2560,0.031038933992385866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,16384,32,0.004913066824277243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,16384,32,0.016720000902811685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,2048,0.026051199436187743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,5120,0.0529151995976766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,65536,0.4598933219909668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,65536,0.2899807929992676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,1024,0.016327466567357382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,16384,0.12626240253448487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,768,0.014705066879590353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,16384,0.10698453585306804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,12288,0.10162453651428223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,12288,0.08140160242716471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,10240,0.08815893332163492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,10240,0.07005226612091064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,8192,0.07791253725687662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,8192,0.05873493353525797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,256,0.010396800438563029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,7168,0.067958402633667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,16384,128,0.009543466567993163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,7168,0.05410559972127279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,6144,0.04977173407872518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,6144,0.04768426815668742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,5120,0.04151360193888347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,5120,0.03710613250732422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,65536,0.5288277308146159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,16384,0.1417088031768799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,4096,0.03811519940694173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,12288,0.10953813393910725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,4096,0.032790400584538776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,3584,0.030690133571624756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,3584,0.030742400884628297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,3072,0.026263467470804852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,8192,0.08106773694356283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,3072,0.028216532866160077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,7168,0.06995946566263835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,2560,0.022323199113210044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,2560,0.02600533366203308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,6144,0.06019413471221924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,2560,0.028200532992680865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,2048,0.018462934096654258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,2048,0.023497599363327026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,1536,0.01474346617857615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,1536,0.022091732422510783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,5120,0.05008533398310343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,1024,0.010730666915575664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,1024,0.01990933418273926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,768,0.009123200178146362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,768,0.01906026601791382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,10240,0.09268266359965006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,4096,0.042165335019429526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,512,0.007682133217652638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,512,0.01715946594874064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,256,0.005415466427803039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,3584,0.037356801827748615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,256,0.016570666432380678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,128,0.004549333453178405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,3072,0.032833067576090495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,128,0.01609173317750295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,64,0.004062933226426443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,64,0.016215466459592185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,2048,0.023228800296783446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,12288,32,0.004462933540344239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,12288,32,0.01634880006313324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,1536,0.018979199727376304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,1024,0.014398933450380961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,65536,0.4020938555399577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,16384,0.10938133398691814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,65536,0.263645871480306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,768,0.012371200323104858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,16384,0.08600107034047445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,12288,0.08277119795481364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,12288,0.06453440189361573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,10240,0.06889066696166993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,10240,0.056797866026560465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,256,0.00867306689421336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,8192,0.05599040190378825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,128,0.007709866762161255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,8192,0.04808533191680908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,8192,0.07677120367685954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,7168,0.05412693421045939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,7168,0.04440000057220459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,6144,0.04643413225809733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,6144,0.040507733821868896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,12288,512,0.010523733496665955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,16384,0.1336352030436198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,65536,0.49441280364990237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,5120,0.03953386545181274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,5120,0.03650346597035726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,4096,0.03414933284123738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,12288,0.10336533387502034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,4096,0.03249920010566711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,3584,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,3584,0.03078719973564148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,3072,0.02547093431154887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,3072,0.028035199642181395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,10240,0.08702399730682372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,2560,0.02104533314704895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,2560,0.025654399394989015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,2048,0.018660267194112144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,2048,0.023685334126154582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,7168,0.06305493513743082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,1536,0.013750400145848593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,1536,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,1536,0.017407999436060587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,6144,0.058568533261617026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,1024,0.010106666882832845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,1024,0.019942400852839152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,768,0.009480533003807069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,5120,0.047890134652455646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,768,0.01838399966557821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,768,0.011914666493733723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,512,0.007281066477298736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,3584,0.0354912002881368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,512,0.01747093399365743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,256,0.005312000215053558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,3072,0.030587732791900635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,256,0.016200533509254454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,2560,0.02667413353919983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,128,0.004769066472848257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,128,0.015738667050997416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,64,0.004308266441027323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,64,0.01599679986635844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,10240,32,0.00439573327700297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,2048,0.022104533513387044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,10240,32,0.015986133615175882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,65536,0.3093461354573568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,65536,0.21124053001403809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,16384,0.09530773162841796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,16384,0.06871360143025716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,4096,0.04005973339080811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,1024,0.013275733590126038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,12288,0.06406933466593424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,12288,0.05357866684595743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,10240,0.05388480027516683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,10240,0.047364266713460286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,512,0.00944533348083496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,8192,0.04367466767628987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,8192,0.04317653179168701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,256,0.0076885332663853955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,7168,0.038950399557749434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,10240,128,0.007050666709740956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,7168,0.03860586484273275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,6144,0.0340394655863444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,6144,0.034302934010823564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,5120,0.0292138675848643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,5120,0.029741867383321123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,5120,0.030203733841578168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,65536,0.3209514617919922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,4096,0.02438186605771383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,4096,0.027501867214838667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,16384,0.08894080320994059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,3584,0.021793067455291748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,12288,0.06606719891230264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,3584,0.02510506709416707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,3584,0.024065067370732628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,3072,0.01925440033276876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,10240,0.05566080013910929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,3072,0.02352213263511658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,2560,0.01539306640625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,2560,0.02262399991353353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,2048,0.012634666760762534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,2048,0.02119893431663513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,7168,0.041739734013875325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,1536,0.010496000448862713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,1536,0.020026665925979615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,1536,0.013639466961224875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,6144,0.03673706849416097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,1024,0.008307200173536937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,1024,0.017729065815607705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,768,0.007386666536331177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,768,0.017147733767827352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,4096,0.02648319999376933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,512,0.0057888001203536986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,512,0.016403200229008992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,8192,0.04643839995066325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,3072,0.021406932671864828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,256,0.00450133333603541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,256,0.01567039986451467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,2560,0.018614399433135986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,128,0.003945599993069967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,128,0.015304533640543619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,128,0.006748799979686737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,64,0.003659733384847641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,2048,0.016504533092180886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,64,0.015718400478363037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,8192,32,0.0040394666294256846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,8192,32,0.015613866845766702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,1024,0.011096533139546711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,65536,0.28693227767944335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,16384,0.08025706609090169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,65536,0.19968640009562175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,16384,0.07431146303812662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,12288,0.0871999979019165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,768,0.01002346674601237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,12288,0.052121599515279136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,10240,0.05866026480992635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,10240,0.046136534214019774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,10240,0.05524906714757284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,8192,0.04734293222427368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,512,0.008373333017031352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,8192,0.04037653207778931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,7168,0.03374079863230388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,7168,0.03691946665445964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,7168,0.03869866530100505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,6144,0.029901866118113202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,6144,0.03341439962387085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,5120,0.025546665986378985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,5120,0.03113066752751668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,5120,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,4096,0.02164586583773295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,4096,0.027490133047103883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,3584,0.019678932428359986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,3584,0.025300266345342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,16384,0.0881066640218099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,65536,0.31759465535481773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,3072,0.017077332735061644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,3072,0.02453119953473409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,12288,0.06350613435109456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,3072,0.020412800709406535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,2560,0.01527466674645742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,2560,0.022932267189025878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,8192,256,0.007478400071461995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,2048,0.012634666760762534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,2048,0.02145919998486837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,2048,0.01495146652062734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,1536,0.010431999961535137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,8192,0.04501653512318929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,1536,0.02035413384437561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,1536,0.012487467130025227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,1024,0.008393599589665731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,1024,0.018326399723688762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,768,0.007231999933719635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,768,0.0171615997950236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,512,0.005766400198141734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,512,0.016552533706029257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,256,0.0046293333172798155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,4096,0.02502506573994954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,256,0.01597546637058258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,128,0.0042133331298828125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,6144,0.035358933607737224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,128,0.015281066298484802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,3584,0.022884267568588256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,128,0.006006399790445963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,64,0.003931733220815659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,64,0.015586133797963461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,7168,32,0.0042303999265035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,7168,32,0.015770666797955833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,2560,0.017773866653442383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,65536,0.23206613858540853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,65536,0.17476372718811034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,16384,0.07098346551259359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,16384,0.062458666165669766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,12288,0.057309865951538086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,16384,0.07777386506398519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,12288,0.05256533225377401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,1024,0.010012800494829815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,10240,0.05003626743952433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,10240,0.04533119996388753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,768,0.008481066425641377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,8192,0.03749653498331706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,8192,0.03876693248748779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,7168,0.029612799485524494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,7168,0.03643413384755452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,256,0.0064074665307998655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,6144,0.02613866726557414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,6144,0.03289706707000732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,6144,0.03228586713473002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,5120,0.02314773400624593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,5120,0.028961066404978437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,5120,0.027195733785629273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,4096,0.021290665864944457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,4096,0.025916800896326704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,65536,0.2988618532816569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,3584,0.019483733177185058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,12288,0.05970880190531412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,3584,0.025010132789611818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,7168,512,0.007492266595363617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,3072,0.016266666849454246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,10240,0.04856853485107422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,3072,0.023414399226506552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,2560,0.013230933745702108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,8192,0.04281066656112671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,2560,0.021563732624053956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,2048,0.011243733763694762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,2048,0.02084160049756368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,1536,0.00955839951833089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,1536,0.019410133361816406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,4096,0.023845332860946655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,1024,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,1024,0.017594667275746663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,768,0.006348800162474315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,3584,0.021074134111404418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,768,0.017142399152119955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,768,0.008243200182914735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,512,0.005111466844876607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,512,0.016105600198109946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,512,0.007398400207360585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,3072,0.01909760038057963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,7168,0.037411201000213626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,256,0.004177066683769226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,2560,0.016948266824086507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,128,0.00388373335202535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,256,0.015960533420244852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,256,0.00647680014371872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,128,0.015031466881434122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,64,0.015317333738009134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,2048,0.014735999703407287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,64,0.003565866748491923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,6144,32,0.003789866715669632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,6144,32,0.015227733055750528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,65536,0.20210240681966146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,65536,0.15681173006693522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,16384,0.05813759962717692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,16384,0.05374079942703247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,12288,0.04469653367996216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,12288,0.04357759952545166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,12288,0.05649813413619995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,10240,0.03788479963938395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,10240,0.03889919916788737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,1024,0.009501866499582927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,8192,0.03941973447799683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,8192,0.03496319850285848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,7168,0.03034026622772217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,7168,0.03199359973271688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,6144,0.026443733771642046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,6144,0.028952533006668092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,128,0.005884799857934316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,6144,1536,0.01220373312632243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,5120,0.022637865940729775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,5120,0.026608000199000042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,4096,0.01932800014813741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,4096,0.024892799059549966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,16384,0.0761141300201416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,3584,0.016914133230845133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,10240,0.048441600799560544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,3584,0.023685334126154582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,8192,0.03986133337020874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,3072,0.015078399578730264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,3072,0.022131200631459555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,7168,0.035097599029541016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,2560,0.013061333696047464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,2560,0.021342933177947998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,2048,0.012315733234087627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,6144,0.030598400036493938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,2048,0.020067199071248373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,1536,0.010151466727256775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,1536,0.01797653237978617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,5120,0.02595733404159546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,1024,0.007495466868082683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,65536,0.2693642616271973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,1024,0.016884267330169678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,768,0.005939200023810069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,768,0.016445866227149962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,3584,0.020451200008392335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,512,0.00489386667807897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,512,0.0160970667997996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,3072,0.018101332585016887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,512,0.0068906664848327635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,256,0.0039594667653242745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,256,0.015273599823315939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,2560,0.01586133340994517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,128,0.003689600030581156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,128,0.01516800026098887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,128,0.005496533215045929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,2048,0.013486933708190919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,64,0.0033439998825391137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,64,0.014924800395965577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,5120,32,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,5120,32,0.014878933628400167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,1536,0.01107413371404012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,4096,0.02223466634750366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,65536,0.16408853530883788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,65536,0.13701012929280598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,16384,0.0506933331489563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,16384,0.057361066341400146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,1024,0.009305600325266521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,16384,0.06518293221791585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,12288,0.04648960034052531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,12288,0.04373013178507487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,768,0.007772799829641979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,10240,0.038995198408762616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,10240,0.039586134751637775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,8192,0.03698773384094238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,8192,0.033794132868448894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,7168,0.030078933636347456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,7168,0.030308266480763752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,5120,256,0.005897599955399831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,6144,0.02434026598930359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,6144,0.027926399310429888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,5120,0.020526933670043945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,5120,0.024436267217000325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,65536,0.24039467175801596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,4096,0.017760000626246133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,12288,0.052073601881663004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,4096,0.022695465882619222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,10240,0.044309333960215254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,3584,0.015867732961972556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,3584,0.022193066279093423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,3072,0.01308799982070923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,3072,0.020801067352294922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,2560,0.010947199662526448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,2560,0.019789866606394448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,7168,0.03268373409907023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,2048,0.009338666995366413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,6144,0.027508266766866046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,2048,0.01812053322792053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,1536,0.007732266684373219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,1536,0.017435733477274576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,5120,0.02368639906247457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,1024,0.00614933321873347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,1024,0.016717867056528727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,4096,0.02118399937947591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,8192,0.035582931836446126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,3584,0.019729065895080566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,768,0.00528106689453125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,768,0.016357333461443583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,768,0.007710933188597361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,512,0.004466133316357931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,3072,0.01733760039011637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,512,0.015867732961972556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,256,0.0038986665507157645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,256,0.015431466698646545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,2560,0.014888532956441245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,128,0.003570133447647095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,128,0.014967466394106546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,64,0.0033269333342711128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,2048,0.013144532839457193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,64,0.015131733814875283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,4096,32,0.003572266548871994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,4096,32,0.015280000368754067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,65536,0.149730126063029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,1536,0.010428800185521444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,65536,0.13344000180562338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,16384,0.04857493241628011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,16384,0.05021440188090006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,1024,0.00843946635723114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,12288,0.03871999979019165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,12288,0.04131199916203816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,10240,0.032833067576090495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,10240,0.035361067454020186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,8192,0.03364373445510864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,8192,0.030963200330734252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,512,0.006938666601975759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,7168,0.02143146594365438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,7168,0.02792106668154399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,128,0.005857066810131073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,6144,0.025183999538421632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,6144,0.0273632009824117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,6144,0.026680533091227216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,5120,0.018272000551223754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,65536,0.24750612576802572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,5120,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,5120,0.023155200481414794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,4096,0.013774933417638144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,12288,0.05085866848627726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,4096,0.022804266214370726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,10240,0.044088534514109295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,3584,0.013688533504803976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,3584,0.022426666816075642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,3072,0.014243200421333313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,4096,256,0.006413866579532623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,3072,0.021294933557510377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,8192,0.03747626543045044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,2560,0.010982400178909302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,2560,0.020230400562286376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,2048,0.009541333715120951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,2048,0.018998400370279948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,7168,0.033003733555475874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,1536,0.008190933366616566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,1536,0.017288533846537273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,1536,0.010123733679453533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,1024,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,16384,0.06652906735738119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,1024,0.01635199983914693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,768,0.005103999873002371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,768,0.01609386702378591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,4096,0.020594133933385213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,512,0.004341333111127218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,3584,0.018687999248504637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,512,0.01574613352616628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,3072,0.01648319959640503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,512,0.006557866434256236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,256,0.0038421332836151125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,256,0.015103999773661295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,2560,0.014678399761517844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,128,0.0034261333445707956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,128,0.014949333667755128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,128,0.005351466437180838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,64,0.0031498665610949195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,64,0.01477013329664866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3584,32,0.003143466760714849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3584,32,0.015170133113861084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,65536,0.12458240191141765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,65536,0.11664746602376301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,16384,0.0400330662727356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,16384,0.04316800038019816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,768,0.0071285332242647815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,12288,0.03337706724802653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,12288,0.03672533432642619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,10240,0.0307370662689209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,10240,0.03193599979082744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,8192,0.02263360023498535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,8192,0.028572799762090047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,2048,0.012504532933235168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,7168,0.02023786703745524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,256,0.005707733333110809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,7168,0.026420267422993977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,6144,0.017812265952428182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3584,1024,0.00832426647345225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,6144,0.02515946626663208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,5120,0.015332266688346863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,5120,0.025224532683690386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,65536,0.22806506156921386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,16384,0.06629653374354044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,4096,0.012973866860071816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,12288,0.051147735118865965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,4096,0.022421334187189737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,3584,0.011582932869593303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,3584,0.021233065923055013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,10240,0.04171839952468872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,3072,0.010447999835014344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,3072,0.020999467372894286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,2560,0.010203733046849569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,8192,0.0350165327390035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,2560,0.019684267044067384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,2048,0.008438400427500407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,2048,0.018487467368443807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,7168,0.031057065725326537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,2048,0.012476799885431926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,1536,0.007517866790294647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,1536,0.01739733417828878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,1536,0.009810133775075277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,1024,0.005682133138179779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,1024,0.016663466890652977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,768,0.004952533543109894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,768,0.016150400042533875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,4096,0.01968533396720886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,3584,0.018092799186706542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,6144,0.026715733607610065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,512,0.004261333247025808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,512,0.015896532932917276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,3072,0.01577279965082804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,256,0.0038122666378815973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,256,0.015521066387494406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,256,0.00552106648683548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,128,0.003519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,128,0.01474346617857615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,2560,0.01388800044854482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,64,0.003156266609827677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,64,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,3072,32,0.0032266666491826378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,3072,32,0.015191466609636942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,65536,0.10767146746317546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,65536,0.10666879812876384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,16384,0.03683306773503621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,16384,0.042423466841379806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,12288,0.02951146761576335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,5120,0.022046933571497597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,1024,0.007949866851170858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,12288,0.03356159925460815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,10240,0.026195200284322102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,10240,0.028888533512751263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,768,0.007048533360163371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,8192,0.020703999201456706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,8192,0.02676266630490621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,512,0.006239999830722809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,7168,0.018629332383473717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,7168,0.025331199169158936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,6144,0.014358400305112203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,3072,128,0.005067733426888784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,6144,0.023525333404541014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,6144,0.025973333915074663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,5120,0.012239999572436015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,5120,0.02256960074106852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,65536,0.221397336324056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,16384,0.06341760158538819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,4096,0.012284800410270691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,4096,0.02072746753692627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,3584,0.009687466422716777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,10240,0.04008320172627767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,3584,0.020500266551971437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,3584,0.017374932765960693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,3072,0.00879039963086446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,3072,0.01949013272921244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,2560,0.008408533533414204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,2560,0.017896533012390137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,7168,0.030055467287699384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,2048,0.007462400197982788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,2048,0.01760639945665995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,8192,0.03418453137079875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,1536,0.006111999849478403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,1536,0.01672640045483907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,12288,0.04757546583811442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,5120,0.02197973330815633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,1024,0.004973866542180379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,1024,0.01630400021870931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,4096,0.01912426749865214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,768,0.0043818667531013485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,768,0.015929599603017174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,512,0.0039018665750821433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,512,0.01550933321317037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,512,0.006169599791367849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,256,0.003601066768169403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,256,0.014979199568430582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,3072,0.015829333662986757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,128,0.0032437334458033243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,128,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,64,0.0029493334392706556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,64,0.014858667055765787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,2560,0.013289599617322286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2560,32,0.00310506671667099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2560,32,0.015030399958292643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,65536,0.0890186627705892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,65536,0.10187093416849773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,16384,0.02987839976946513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,1536,0.009236266215642292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,16384,0.0416757345199585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,2048,0.011602133512496948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,1024,0.007578666508197785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,12288,0.029175466299057005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,768,0.006977066894372304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,12288,0.03063039978345235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,10240,0.02449280023574829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,10240,0.02757866581281026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,8192,0.022379734118779502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,8192,0.025964800516764325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,7168,0.015170133113861084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,128,0.005093333125114441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,7168,0.024117332696914674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,6144,0.01362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,6144,0.023170133431752525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,65536,0.2016256014506022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,5120,0.012004266182581585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,5120,0.021092265844345093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,16384,0.06004266738891602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,4096,0.010472533106803895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,12288,0.04474560022354126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,4096,0.019989333550135293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2560,256,0.005619200070699056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,3584,0.009539199868837993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,3584,0.020294400056203206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,3072,0.008822400371233623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,8192,0.03062293330828349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,3072,0.018948266903559365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,7168,0.028568534056345622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,2560,0.00795413355032603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,2560,0.017834667364756265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,2048,0.006942933301130931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,6144,0.023299199342727662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,2048,0.017090133825937905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,2048,0.010803199807802836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,1536,0.005796266595522562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,1536,0.016781866550445557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,1024,0.004721066852410635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,5120,0.02119893431663513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,1024,0.01576640009880066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,768,0.004333866635958353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,10240,0.03875093460083008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,768,0.015511467059453329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,4096,0.01801919937133789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,768,0.006985599795977275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,512,0.0037952000896135964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,512,0.015431466698646545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,256,0.0035455999275048576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,3584,0.016594133774439492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,256,0.014885333180427552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,128,0.0032000000278155005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,128,0.014807466665903726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,3072,0.014563199877738953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,64,0.003031466652949651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,64,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,2048,32,0.0031338666876157125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,2048,32,0.014807466665903726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,65536,0.07160533269246419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,1536,0.009059199690818786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,65536,0.08953493436177572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,1024,0.007429333527882893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,16384,0.026230400800704955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,16384,0.034670933087666826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,12288,0.021217066049575805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,12288,0.0291103998819987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,256,0.005534933507442474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,2560,0.012836266557375589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,10240,0.018181333939234413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,128,0.005115733544031779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,10240,0.027051732937494917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,10240,0.03595840136210124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,8192,0.015664000312487283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,8192,0.024858667453130086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,7168,0.014076800147692362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,7168,0.023545600970586143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,6144,0.01292693316936493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,6144,0.023127466440200806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,5120,0.012095999717712403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,5120,0.021142399311065672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,65536,0.20075626373291017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,5120,0.02071146567662557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,4096,0.0102783997853597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,16384,0.05908906857172648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,2048,512,0.006265600025653839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,4096,0.019845332702000937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,3584,0.009311999877293904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,3584,0.019332265853881835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,3072,0.008577066659927367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,12288,0.04563519954681396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,3072,0.018447999159495035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,2560,0.007260799904664357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,2560,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,2048,0.006279466549555461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,2048,0.01737920045852661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,8192,0.03149973352750142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,1536,0.005447466671466827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,7168,0.02690879901250203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,1536,0.016268799702326454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,6144,0.023565866549809775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,1024,0.0046165332198143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,1024,0.015847466389338174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,768,0.004178133110205332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,768,0.015480533242225647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,4096,0.018054399887720743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,512,0.0038005332152048744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,512,0.015059199929237366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,3072,0.014079999923706055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,256,0.0032992000381151833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,2560,0.012820266683896384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,256,0.014604799946149192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,128,0.0031093334158261614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,128,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,1536,0.008900266885757447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,64,0.0028917332490285236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,1024,0.007203199962774913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,64,0.014662399888038635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1536,32,0.003110400090614955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1536,32,0.014401066303253173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,2048,0.010670933127403259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,65536,0.0511242667833964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,3584,0.015939199924468996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,768,0.006586666901906331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,65536,0.07746666272481283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,16384,0.02190933426221212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,16384,0.030133332808812457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,12288,0.016942934195200602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,12288,0.02622399926185608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,512,0.005963733295599619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,10240,0.01367573340733846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,10240,0.023891200621922813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,8192,0.012090667088826498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,256,0.005382399757703145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,8192,0.022371200720469157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,7168,0.011218133568763732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,7168,0.0236842672030131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1536,128,0.005009066561857859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,6144,0.01046399970849355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,6144,0.02168533404668172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,5120,0.009608532985051472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,5120,0.020422399044036865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,65536,0.18043093681335448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,16384,0.051260801156361904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,4096,0.009661866227785747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,4096,0.019051732619603474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,3584,0.008739200234413148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,10240,0.03406399885813395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,3584,0.01848640044530233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,3072,0.00795839975277583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,8192,0.029266132911046343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,3072,0.01804693341255188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,7168,0.026195200284322102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,2560,0.006675200164318084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,2560,0.017386666933695474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,6144,0.02222613294919332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,2048,0.005814399818579356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,2048,0.01692053278287252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,5120,0.01937920053799947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,1536,0.005133866767088572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,1536,0.01636693378289541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,12288,0.040775465965271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,1024,0.004458666841189066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,4096,0.017001599073410034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,1024,0.015862400333086647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,768,0.004154666761557261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,3584,0.01530346671740214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,768,0.015684266885121666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,512,0.0037216000258922578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,3072,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,512,0.015180800358454385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,512,0.005859200159708659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,2560,0.011918933192888895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,256,0.0033642667035261786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,256,0.0150026669104894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,128,0.0031541332602500914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,128,0.014333867033322654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,64,0.0029813334345817565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,64,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,1024,32,0.0029088000456492106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,2048,0.010054399569829304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,1024,32,0.014320000012715658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,65536,0.041317331790924075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,65536,0.07168959776560466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,1536,0.008668800195058186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,16384,0.015973333517710367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,16384,0.027663999795913698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,12288,0.012436266740163167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,12288,0.02453546722730001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,12288,0.03236800034840902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,1024,0.007187200089295705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,10240,0.011069867014884948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,10240,0.023231999079386393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,8192,0.011183999975522359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,768,0.006369066735108693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,8192,0.0214303990205129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,7168,0.009620267152786254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,7168,0.02267199953397115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,6144,0.009347200393676758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,256,0.005162666738033295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,6144,0.021423999468485513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,1024,128,0.004882133503754934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,5120,0.008755200107892354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,5120,0.020143999656041463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,4096,0.009063466389973959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,4096,0.019042134284973145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,65536,0.15181439717610676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,16384,0.04453333218892415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,3584,0.007985066870848339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,3584,0.018820265928904213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,3072,0.007277866701285045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,3072,0.01795519987742106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,10240,0.028483200073242187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,2560,0.0064735998709996535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,8192,0.02327893376350403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,2560,0.017366399367650352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,7168,0.020778665939966835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,2048,0.005737600227197012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,2048,0.016973867019017538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,6144,0.018659200270970663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,1536,0.004914133250713349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,1536,0.01620693306128184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,1024,0.004297600189844767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,4096,0.014172800381978354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,1024,0.015901866555213928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,768,0.0038506666819254553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,768,0.015415466825167336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,3584,0.012777599692344665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,512,0.0035904000202814737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,512,0.015179733435312906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,3072,0.011410133043924967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,256,0.0031850665807724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,256,0.01437013347943624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,2560,0.010193066795667012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,128,0.0029674666623274487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,5120,0.015701333681742348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,128,0.014318933089574179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,2048,0.00874773363272349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,64,0.002749866743882497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,64,0.014441600441932679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,768,32,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,1536,0.007712000111738841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,768,32,0.014643200238545737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,65536,0.03247039914131165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,65536,0.06733120282491048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,16384,0.011922132968902589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,1024,0.006502399841944377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,16384,0.025546665986378985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,12288,0.010150399804115296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,12288,0.022946133216222128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,10240,0.00944213370482127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,768,0.005937066674232483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,10240,0.02245546579360962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,8192,0.008573866883913676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,512,0.0054666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,8192,0.021669334173202513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,256,0.005038933455944061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,7168,0.007939200103282928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,7168,0.020426666736602782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,65536,0.12115519841512043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,16384,0.03331306576728821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,6144,0.007396266857783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,12288,0.02477333347002665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,6144,0.019847466548283895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,6144,0.014200533429781595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,5120,0.008282666901747386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,5120,0.019742933909098308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,10240,0.020922666788101195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,4096,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,4096,0.018628267447153728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,4096,0.010777599612871806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,3584,0.0070250665148099255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,8192,0.017774933576583864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,3584,0.01834239959716797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,3072,0.006636799871921539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,768,128,0.005004799862702688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,3072,0.009079466263453167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,3072,0.017667200167973837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,2560,0.00633493314186732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,2560,0.008468266328175862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,2560,0.017208532492319743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,2048,0.0056096002459526065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,2048,0.016613333423932394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,7168,0.015589333573977151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,1536,0.004903466502825419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,1536,0.016386133432388306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,1024,0.004192000130812327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,1024,0.015643733739852905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,768,0.003994666785001755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,768,0.015175466736157736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,768,0.005463466544946035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,512,0.0035968000690142312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,512,0.014993066589037577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,256,0.003173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,256,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,5120,0.012270933389663697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,256,0.005036800106366476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,128,0.002865066627661387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,128,0.014227199554443359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,64,0.0027562665442625684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,64,0.014284800489743552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,3584,0.010396800438563029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,512,32,0.0028789333999156954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,512,32,0.014242133498191834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,65536,0.023414399226506552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,65536,0.06170453230539957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,16384,0.008478933572769165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,16384,0.025142399470011394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,2048,0.007355733215808869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,12288,0.009204266468683879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,12288,0.021899733940760294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,1536,0.007119999825954437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,10240,0.008137600123882293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,10240,0.020964266856511433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,1024,0.006016000111897787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,8192,0.00697386662165324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,8192,0.019606399536132812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,7168,0.006730666756629944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,512,0.0052597333987553915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,7168,0.020578134059906005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,7168,0.014391466975212097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,6144,0.0063296000162760425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,6144,0.020385066668192543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,5120,0.006717866659164429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,5120,0.02012053330739339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,65536,0.09879146416982015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,4096,0.0063296000162760425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,16384,0.02712000012397766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,4096,0.01864746610323588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,3584,0.006678399940331777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,12288,0.021780266364415487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,3584,0.018119466304779053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,10240,0.018820265928904213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,3072,0.006372266511122386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,3072,0.017439999183019004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,8192,0.01560640037059784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,3072,0.008754133184750875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,2560,0.006144000093142191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,2560,0.017298134167989095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,512,128,0.004929066697756449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,2048,0.00555626650651296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,2048,0.016532267133394875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,1536,0.004925866425037384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,6144,0.013032533725102744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,1536,0.01591146687666575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,1024,0.004102399945259095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,1024,0.015402666727701821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,1024,0.005771733323733012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,768,0.0038442666331926978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,5120,0.01141866644223531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,768,0.01532373329003652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,512,0.003469866762558619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,512,0.014867200454076131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,256,0.0030794667700926462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,256,0.014686933159828186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,4096,0.01033066709836324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,256,0.0048320000370343525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,3584,0.010057600339253743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,128,0.0028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,128,0.014332800110181173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,64,0.0028319999575614927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,64,0.014349866906801859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,2560,0.008133333424727123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,256,32,0.002773333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,256,32,0.0141567995150884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,1536,0.006856533388296763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,65536,0.015333333611488342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,65536,0.05675626595815023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,16384,0.007084799806276958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,16384,0.02344640096028646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,16384,0.027378133932749432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,12288,0.0063360000650088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,768,0.005352533360322317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,12288,0.02258666753768921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,512,0.005117866893609365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,10240,0.006757333377997081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,10240,0.021305600802103676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,10240,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,8192,0.0065098668138186145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,8192,0.020281600952148437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,8192,0.015686399737993875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,7168,0.0064181332786877945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,7168,0.020193066199620566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,6144,0.006111999849478403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,6144,0.020347734292348228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,6144,0.012862933675448099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,5120,0.006593066453933716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,128,0.004590933521588644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,256,2048,0.007268266876538594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,5120,0.019769599040349327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,4096,0.006088533500830332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,4096,0.018588799238204955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,3584,0.006625066697597504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,3584,0.018078933159510292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,3072,0.006337066491444905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,65536,0.09991466999053955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,3072,0.017382399241129557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,2560,0.006235733131567637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,2560,0.016873600085576375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,2560,0.008141866823037466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,2048,0.005352533360322317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,2048,0.016448000073432924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,12288,0.021633066733678184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,1536,0.004739200075467428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,1536,0.0159850666920344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,1024,0.004086400071779886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,1024,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,7168,0.01444586714108785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,768,0.003735466549793879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,768,0.0150218665599823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,5120,0.011473066608111064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,512,0.0034677334129810332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,512,0.014980266491572062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,3584,0.009934932986895243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,3072,0.00881813367207845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,256,0.0030218665798505146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,256,0.01456000010172526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,128,0.0028480000793933867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,128,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,2048,0.007141333321730297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,128,0.004695466657479604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,1536,0.006975999971230824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,64,0.0027295999228954316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,128,32,0.0027317332724730173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,64,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,128,32,0.014318933089574179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,65536,0.010315733154614766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,16384,0.007066666583220164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,1024,0.0059338668982187905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,12288,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,65536,0.05208106835683187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,16384,0.023756800095240276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,4096,0.010365866621335347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,10240,0.006477866570154827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,12288,0.021362133820851645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,10240,0.02127466599146525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,8192,0.00636053333679835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,768,0.00550186683734258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,8192,0.020253866910934448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,6144,0.019308799505233766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,5120,0.006425599753856659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,7168,0.006258133550484974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,7168,0.020307199160257975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,6144,0.006026666859785716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,512,0.005110399921735128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,4096,0.005995733539263407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,5120,0.019504000743230186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,4096,0.018651733795801796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,3584,0.0064629331231117245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,3584,0.017953066031138103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,3072,0.006110933423042297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,192,128,256,0.0047989333669344585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,3072,0.017480534315109254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,2560,0.006177066763242086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,2560,0.017283199230829875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,2048,0.005331199864546458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,2048,0.016694400707880655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,1536,0.004732800026734671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,1536,0.015729066729545594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,1024,0.004045866678158442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,1024,0.015530666708946228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,768,0.0037258667250474296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,768,0.014897066354751586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,512,0.003345066557327906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,128,0.01416106621424357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,512,0.01497066617012024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,256,0.0030261332790056865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,256,0.014548266927401224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,128,0.0027776000400384264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,64,0.0027072000006834666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,64,0.01420906682809194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,64,32,0.002735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,64,32,0.014176000157992044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,65536,0.009629866480827332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,16384,0.0067775999506314594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,65536,0.05058240095774332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,16384,0.023035732905069987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,12288,0.006411733229955037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,12288,0.022352000077565513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,10240,0.006366933385531108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,10240,0.02118399937947591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,8192,0.006287999947865804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,8192,0.019603200753529868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,7168,0.006172800064086914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,7168,0.01995519995689392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,6144,0.006067200005054474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,6144,0.020214400688807168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,5120,0.006434133152167003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,5120,0.019338667392730713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,4096,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,4096,0.018717867136001588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,3584,0.0064074665307998655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,3584,0.017851734161376955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,2048,0.005352533360322317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,3072,0.006010666489601135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,3072,0.017910399039586387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,2560,0.006053333481152853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,2560,0.017038933436075845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,2048,0.016742400328318276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,1536,0.004752000172932943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,1536,0.015793066223462424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,1024,0.004073599974314371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,256,0.003036800026893616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,256,0.014481066664059957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,1024,0.01534293293952942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,768,0.003685333331425985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,768,0.015581867098808289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,512,0.0033610666791598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,512,0.015657599767049155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,128,0.002899199972550074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,64,0.0026687999566396077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,128,0.014210133751233419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,64,0.014140799641609192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,192,32,32,0.0026837334036827086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,192,32,32,0.014393599828084311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,16384,0.5498570760091146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,16384,0.30400320688883464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,12288,0.41463146209716795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,12288,0.2355797290802002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,10240,0.3559562683105469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,10240,0.2005397319793701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,8192,0.28251094818115235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,8192,0.17024319966634113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,10240,0.43268693288167315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,7168,0.2530357360839844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,7168,0.14667840003967286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,6144,0.21310613950093588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,6144,0.1302986701329549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,5120,0.1811477343241374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,5120,0.11316053072611491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,6144,0.283242670694987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,4096,0.1474229335784912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,4096,0.09560960133870443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,3584,0.12966612974802655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,3584,0.08707199891408285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,3072,0.1138698657353719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,3072,0.0773301362991333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,12288,0.5013162612915039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,2560,0.09715306758880615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,2560,0.06786026954650878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,7168,0.27797441482543944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,2560,0.11937493483225506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,2048,0.08086613019307455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,2048,0.05809173186620077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,5120,0.23096853892008462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,1536,0.06320319970448812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,1536,0.04857600132624308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,4096,0.191372807820638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,1024,0.04541120131810506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,1024,0.04238933324813843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,3584,0.16225706736246745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,768,0.03939626614252727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,16384,0.6200960159301758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,768,0.034774398803710936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,512,0.029556266466776532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,512,0.028716800610224406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,3072,0.14238613446553547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,8192,0.37112960815429685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,512,0.035382401943206784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,256,0.01755946675936381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,256,0.024785067637761435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,256,0.030228267113367718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,128,0.011500799655914306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,128,0.02198293407758077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,64,0.009673600395520527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,64,0.022291199366251627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,65536,32,0.009550933043162029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,65536,32,0.021943465868632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,2048,0.0982911984125773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,1536,0.0728928009668986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,65536,0.5677909215291341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,65536,0.33694187800089515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,16384,0.1392170588175456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,1024,0.051368534564971924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,16384,0.10210346380869548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,12288,0.10604586601257324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,768,0.04340266784032186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,12288,0.0747925360997518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,12288,0.13316266536712645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,10240,0.08964587052663167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,10240,0.06514666477839151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,10240,0.11650666395823162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,8192,0.0723136027654012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,8192,0.056088534990946445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,8192,0.09348479906717935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,7168,0.06432746648788452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,7168,0.08674026330312093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,7168,0.049762133757273355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,6144,0.056060798962910975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,6144,0.04699519872665405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,5120,0.047620264689127605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,5120,0.04135893185933431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,4096,0.040667732556660965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,4096,0.036160000165303546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,3584,0.0353983998298645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,3584,0.03426986535390218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,3584,0.04645866552988688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,3072,0.031242666641871135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,3072,0.03178453246752421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,3072,0.04098666508992513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,2560,0.026980266968409224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,2560,0.028990934292475384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,2048,0.022397865851720176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,65536,0.6375765482584635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,2048,0.025867732365926106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,1536,0.01804373264312744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,1536,0.02293013334274292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,65536,128,0.026587732632954914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,1024,0.012850133577982583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,1024,0.020639999707539877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,6144,0.07464959621429443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,768,0.010583466291427613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,5120,0.06402240196863809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,768,0.01989439924558004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,4096,0.05103040138880412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,768,0.01493333379427592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,512,0.008437333504358928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,512,0.018683733542760213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,512,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,256,0.006446933249632518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,256,0.016617600123087564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,16384,0.17556479771931965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,128,0.004858666658401489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,128,0.01599253316720327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,2560,0.03485759894053141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,64,0.0043722664316495265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,128,0.009713066617647807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,64,0.016220800081888833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,16384,32,0.0047775998711586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,2048,0.029523199796676634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,16384,32,0.016561067104339598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,1536,0.022957867383956908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,16384,0.12022826671600342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,65536,0.42999467849731443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,16384,0.07987946669260661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,65536,0.2620896021525065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,12288,0.09528106848398844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,16384,0.15473814010620118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,65536,0.5796864191691081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,12288,0.06969599723815918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,10240,0.08907946745554605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,10240,0.0566154678662618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,8192,0.06470613479614258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,8192,0.04846186637878418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,7168,0.05250666538874308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,7168,0.044870400428771974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,6144,0.04578773180643718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,256,0.010466133554776508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,6144,0.04071573416392009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,6144,0.06814826329549153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,5120,0.039332266648610434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,5120,0.03555946747461955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,4096,0.03213653365770976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,4096,0.0314517339070638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,4096,0.047880534331003824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,3584,0.029020800193150835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,3584,0.029499733448028566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,3584,0.04207253456115723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,16384,1024,0.017237333456675212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,3072,0.02572480042775472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,3072,0.03725546598434448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,3072,0.02713279922803243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,2560,0.0210591991742452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,12288,0.1241429328918457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,2560,0.024794665972391765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,10240,0.10411946773529053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,2048,0.017564799388249716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,2048,0.022835199038187662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,2048,0.026405332485834758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,1536,0.013857066631317139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,8192,0.08979199727376302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,1536,0.021166932582855225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,1024,0.010216533144315084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,1024,0.019529600938161217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,768,0.008699733018875121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,768,0.01867626706759135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,512,0.007191466788450877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,5120,0.05728853146235148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,512,0.016635732849438985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,256,0.0049216002225875854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,256,0.016139733791351318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,128,0.004221866528193155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,128,0.01567466656366984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,2560,0.03231786688168843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,128,0.007816533247629803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,7168,0.08033173084259033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,64,0.003924266745646795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,64,0.01575573285420736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,12288,32,0.0042357335488001505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,12288,32,0.015969066818555196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,1536,0.020359466473261513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,16384,0.10714773337046306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,768,0.013326932986577352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,65536,0.38836905161539714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,65536,0.23436373074849448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,16384,0.08328426678975423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,512,0.01076693336168925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,12288,0.09406720002492269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,12288,0.06386133432388305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,12288,0.1120031992594401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,10240,0.0727509339650472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,10240,0.05595946709314982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,256,0.008618666728337606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,10240,0.10192639827728271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,8192,0.06125760078430176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,8192,0.048870400587717695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,8192,0.08704000314076742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,7168,0.048596266905466715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,7168,0.04431999921798706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,6144,0.04752106666564941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,6144,0.03925120035807292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,5120,0.039876266320546465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,5120,0.03349440097808838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,12288,1024,0.01590506633122762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,4096,0.033463466167449954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,4096,0.029649066925048827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,4096,0.044760533173878986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,3584,0.027371732393900554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,3584,0.02759360074996948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,3072,0.024152533213297526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,3072,0.025652267535527545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,65536,0.5480597178141277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,16384,0.15385600725809734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,2560,0.018901334206263224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,2560,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,2048,0.01590079963207245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,2048,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,1536,0.01269653340180715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,1536,0.021035732825597127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,1536,0.019261866807937622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,7168,0.0748042662938436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,1024,0.009929600358009338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,6144,0.06447466611862182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,1024,0.019189333915710448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,1024,0.014699733257293702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,768,0.008386133114496867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,768,0.018462934096654258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,5120,0.055447467168172206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,512,0.007118933399518331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,512,0.01708586613337199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,512,0.009562666217486065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,256,0.0050335998336474095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,256,0.015821866194407144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,128,0.004257066547870636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,128,0.01564906636873881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,64,0.004177066683769226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,64,0.01543786625067393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,2560,0.029628799359003706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,10240,32,0.004469333092371622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,10240,32,0.015738667050997416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,2048,0.024726400772730507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,65536,0.2935082753499349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,65536,0.1937152067820231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,65536,0.33493121465047204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,16384,0.07828053633371988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,16384,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,12288,0.05526293516159057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,768,0.012421333789825439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,12288,0.04734293222427368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,3584,0.040565331776936844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,12288,0.07482986450195313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,10240,0.04668266773223877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,3072,0.035767467816670735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,10240,0.04291733503341675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,8192,0.03823893467585246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,256,0.007684266567230225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,8192,0.03761066595713298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,8192,0.051617066065470375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,10240,128,0.007031466563542683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,7168,0.03346560001373291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,7168,0.033989334106445314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,7168,0.04737173318862915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,6144,0.02956693371136983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,6144,0.03173226714134216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,6144,0.03991146485010783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,5120,0.025803732872009277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,5120,0.029049599170684816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,4096,0.02536853353182475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,5120,0.03411200046539307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,3584,0.019388800859451293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,4096,0.02201813260714213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,3584,0.024465066194534302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,3072,0.017190400759379068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,3072,0.022807466983795165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,2560,0.015015467007954916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,16384,0.09624959627787272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,2560,0.021849600474039714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,2048,0.01232319970925649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,2048,0.02042986750602722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,2048,0.0172650674978892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,1536,0.010060800115267436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,1536,0.01956053376197815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,1024,0.008006399869918824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,10240,0.062410668532053626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,1024,0.01739733417828878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,768,0.007073066631952922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,768,0.0166485329469045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,512,0.005514666438102722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,512,0.016127999623616537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,4096,0.029497599601745604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,256,0.004350933432579041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,256,0.01539413332939148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,3072,0.022921599944432578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,128,0.003869866579771042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,128,0.015229866902033488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,128,0.00682773341735204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,64,0.0035445332527160645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,1536,0.014365866780281067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,64,0.015390933553377787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,8192,32,0.003914666672547659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,1024,0.011541333794593812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,8192,32,0.015607466300328573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,768,0.010123733679453533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,65536,0.27166932423909507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,65536,0.18131200472513836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,16384,0.06492693424224853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,512,0.008296533425649007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,16384,0.058100267251332605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,16384,0.08897919654846191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,12288,0.05027733246485392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,3584,0.026501333713531493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,12288,0.04518826802571614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,10240,0.04261546532313029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,10240,0.040174933274586995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,2560,0.020515199502309164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,8192,0.034644265969594315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,8192,0.03502613306045532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,7168,0.0313045342763265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,8192,256,0.00746559997399648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,7168,0.03279146750768026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,6144,0.027590399980545043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,6144,0.03102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,6144,0.038199468453725176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,5120,0.024036266406377158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,5120,0.027485867341359455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,4096,0.020232532421747842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,4096,0.024745599428812663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,4096,0.028321067492167156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,3584,0.017794134219487508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,3584,0.023458133141199745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,3072,0.016148266196250916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,65536,0.3330933252970377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,3072,0.022498132785161336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,2560,0.013876266280810037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,12288,0.06874346733093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,2560,0.02159893314043681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,2048,0.011497599879900615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,8192,0.04919786850611369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,2048,0.020221867163976035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,1536,0.009682133793830872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,7168,0.04241173267364502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,1536,0.019495467344919838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,1024,0.007810133198897044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,5120,0.03256426652272542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,1024,0.017064533631006875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,3584,0.024606933196385704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,768,0.00680320014556249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,768,0.01667733391125997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,10240,0.058317867914835606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,768,0.008709333340326945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,3072,0.021499733130137123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,512,0.005303466816743215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,512,0.01598186691602071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,2560,0.019250132640202842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,512,0.007635200023651123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,256,0.00439573327700297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,256,0.01528320014476776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,256,0.006497066716353099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,128,0.0037109332780043284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,2048,0.01596799989541372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,128,0.005898666878541311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,128,0.0151829332113266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,64,0.003420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,64,0.015094400445620219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,7168,32,0.0035807999471823373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,1536,0.012974933783213297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,7168,32,0.014962133765220643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,65536,0.21835519472757975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,7168,1024,0.010228266318639118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,16384,0.0670186678568522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,65536,0.15865920384724935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,16384,0.052655998865763345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,16384,0.08805867036183676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,12288,0.057886934280395506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,10240,0.03865706523259481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,12288,0.043611733118693034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,12288,0.06578986644744873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,10240,0.04645546674728394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,8192,0.03221653302510579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,8192,0.03477226495742798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,7168,0.028997333844502766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,7168,0.03207040031750997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,6144,0.025593600670496625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,6144,0.029198932647705077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,6144,0.03592853148778279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,5120,0.022222934166590373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,5120,0.026837333043416338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,5120,0.03128746747970581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,4096,0.01850773294766744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,4096,0.024090667565663658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,4096,0.02567360003789266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,3584,0.016701867183049522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,3584,0.02300693392753601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,3072,0.014819199840227762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,3072,0.022165334224700926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,3072,0.021105066935221354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,2560,0.012795733412106833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,2560,0.021385600169499717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,65536,0.3156576156616211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,2048,0.01099626620610555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,2048,0.02026559909184774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,2048,0.015612799922625223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,1536,0.009253333012262981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,1536,0.018934400876363118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,1536,0.012853333353996277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,1024,0.0075765331586201985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,10240,0.05766613483428955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,1024,0.017488000790278117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,768,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,768,0.016522666811943053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,768,0.008506666620572407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,512,0.005002666513125102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,7168,0.04114453395207723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,512,0.01623466710249583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,256,0.004321066538492838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,256,0.015337600310643514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,256,0.006348800162474315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,128,0.0038986665507157645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,128,0.014830933014551798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,64,0.003549866626660029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,64,0.015017599860827128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,6144,32,0.003605333218971888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,3584,0.0237226665019989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,6144,32,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,65536,0.18949012756347655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,65536,0.14068160057067872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,2560,0.01873706579208374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,16384,0.05960853497187296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,16384,0.0526581327120463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,12288,0.04502293268839518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,8192,0.04638933340708415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,12288,0.043435732523600265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,1024,0.009973333279291789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,10240,0.039678935209910074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,10240,0.039662933349609374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,8192,0.03396799961725871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,8192,0.0346997340520223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,512,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,7168,0.0271999994913737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,7168,0.031996800502141313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,6144,0.023481599489847817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,6144,128,0.005995733539263407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,6144,0.02963520089785258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,5120,0.023143466313680014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,5120,0.025705599784851076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,65536,0.29217494328816734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,16384,0.08342293103535971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,5120,0.02917120059331258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,4096,0.018565332889556883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,4096,0.02344533403714498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,4096,0.024986666440963746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,3584,0.01831573247909546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,3584,0.023086933294932048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,3072,0.014312533537546792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,10240,0.05407786766688029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,3072,0.021371734142303467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,3072,0.01965226729710897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,8192,0.04402986764907837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,2560,0.011947733163833619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,2560,0.021042132377624513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,2048,0.010412800312042236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,2048,0.01973973313967387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,1536,0.009085866808891296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,1536,0.018987733125686645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,6144,0.03407040039698283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,1024,0.0074325333038965866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,1024,0.016889599959055583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,12288,0.06242666641871134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,1024,0.009512533744176228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,768,0.006142933170000712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,768,0.016390400131543477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,512,0.004971733192602793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,512,0.01613866686820984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,256,0.004172799984614054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,256,0.015321600437164306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,3584,0.02258346676826477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,256,0.006077866752942403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,7168,0.04033386707305908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,128,0.003894400099913279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,128,0.014935466647148132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,128,0.005654400090376536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,64,0.003458133339881897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,2560,0.01744640072186788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,64,0.01502293348312378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,5120,32,0.003722666700681051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,5120,32,0.015010133385658264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,2048,0.014647466937700906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,65536,0.1518346627553304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,65536,0.1222879966100057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,16384,0.04378666480382283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,16384,0.04575573205947876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,1536,0.012119467059771221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,16384,0.07368533611297608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,12288,0.035196801026662186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,12288,0.03851093451182048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,10240,0.031247999270757037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,10240,0.03552746772766113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,8192,0.027373866240183516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,8192,0.03419946829477946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,7168,0.02453546722730001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,7168,0.030741333961486816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,512,0.006887466708819072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,6144,0.022210133075714112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,6144,0.028615466753641766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,5120,0.019754666090011596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,5120,0.023769599199295045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,12288,0.05687893231709799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,5120,768,0.008050133287906647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,4096,0.019246933857599895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,10240,0.04821226596832275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,4096,0.021512534221013388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,4096,0.02296746571858724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,3584,0.01708266735076904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,8192,0.037980798880259195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,3584,0.0216213325659434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,3072,0.014364799857139588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,7168,0.03448853492736816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,3072,0.020593067010243736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,6144,0.030112000306447347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,5120,0.02595626711845398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,2560,0.010039466619491576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,2560,0.01957226594289144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,65536,0.26445120175679526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,2048,0.00877333382765452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,2048,0.01853013237317403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,1536,0.0073183998465538025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,1536,0.011366400122642516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,1536,0.017123200496037803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,1024,0.016037333011627197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,1024,0.005568000177542368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,1024,0.008850133419036866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,768,0.00521066685517629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,768,0.015875200430552162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,512,0.004375466704368591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,3584,0.02113599975903829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,512,0.016075733304023742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,512,0.00719893326361974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,256,0.0037418665985266366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,256,0.015224533279736838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,128,0.003454933315515518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,128,0.014662399888038635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,64,0.0032138665517171226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,64,0.015007999539375306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,4096,32,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,2560,0.016499200463294984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,4096,32,0.014963199694951376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,65536,0.14607572555541992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,65536,0.11461439927419026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,2048,0.013961600263913474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,16384,0.039825065930684404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,16384,0.04289066791534424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,12288,0.03267733256022136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,12288,0.03705173333485921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,768,0.008026666442553202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,12288,0.059572267532348636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,3072,0.018090667327245076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,10240,0.028995199998219805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,10240,0.03365333477656047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,8192,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,8192,0.03134719928105672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,8192,0.041154134273529056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,7168,0.022711465756098427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,128,0.005847466488679251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,7168,0.029794132709503172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,6144,0.020595200856526694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,6144,0.02818453311920166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,5120,0.015267200271288552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,5120,0.021939200162887574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,4096,0.012666666507720947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,65536,0.2673407872517904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,4096,0.02150613268216451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,16384,0.07655146916707357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,4096,0.02299519975980123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,3584,0.011387733618418376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,3584,0.020039467016855876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,3072,0.01037013332049052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,3072,0.019336533546447755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,4096,256,0.006437333424886067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,2560,0.008906666437784832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,2560,0.016251732905705772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,2560,0.019052799542744955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,2048,0.007868800063927968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,2048,0.017334399620691936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,1536,0.006954666475454967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,7168,0.037699198722839354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,1536,0.016795732577641807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,6144,0.031700267394383745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,1024,0.005275733272234599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,1024,0.016157866517702738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,768,0.004459733267625173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,5120,0.0273087998231252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,768,0.01598186691602071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,512,0.003997866561015447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,512,0.015378133455912272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,3584,0.020987733205159505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,10240,0.0495743989944458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,256,0.003573333223660787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,3072,0.018436266978581747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,256,0.015095466375350952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,128,0.0033429334561030067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,128,0.014989866813023885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,64,0.003221333275238673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,2048,0.01339413324991862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3584,32,0.0030741333961486817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,64,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3584,32,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,1536,0.010994133353233338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,65536,0.11784319877624512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,65536,0.10640959739685059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,16384,0.03824319839477539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,16384,0.0413973331451416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,12288,0.030963200330734252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,768,0.007542400062084198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,12288,0.03519146839777629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,10240,0.026586665709813433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,512,0.00647573322057724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,10240,0.03155519962310791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,8192,0.02296853264172872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,8192,0.02845226724942525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,256,0.005817600091298421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,8192,0.04124053319295247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,128,0.005346133311589559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,7168,0.020809600750605263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,7168,0.026893866062164307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,6144,0.018866133689880372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,6144,0.02561066746711731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,5120,0.01673706571261088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3584,1024,0.008663466572761536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,5120,0.021564799547195434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,4096,0.014569600423177084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,65536,0.26206293106079104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,4096,0.02051946719487508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,3584,0.012567466497421265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,3584,0.019886932770411172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,12288,0.053514667352040614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,3072,0.011081600189208984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,3072,0.020111999909083047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,10240,0.04871360063552856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,3072,0.0180074671904246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,2560,0.008471467097600301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,2560,0.017821866273880004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,2048,0.007732266684373219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,2048,0.017384533087412515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,7168,0.03528853257497151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,1536,0.0065760001540184024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,6144,0.029666133721669513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,1536,0.017063466707865398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,16384,0.07470400333404541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,1024,0.004956800242265066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,1024,0.016030933459599814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,4096,0.022938666741053264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,768,0.004554666578769684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,3584,0.020387200514475505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,768,0.015513599912325541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,512,0.00402453343073527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,512,0.015642666816711427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,2560,0.01602026621500651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,256,0.0035402665535608927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,256,0.015947733322779337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,2048,0.01304746667544047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,128,0.0031957333286603295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,5120,0.026115200916926068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,128,0.014728533228238425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,64,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,64,0.014738133549690247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,1536,0.010501333077748616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,3072,32,0.0030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,3072,32,0.014774399995803832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,65536,0.10028479894002278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,65536,0.09794452985127768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,16384,0.031972267230351764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,16384,0.038254932562510176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,12288,0.0258026659488678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,768,0.007276799778143566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,12288,0.031352533896764116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,10240,0.022564266125361124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,512,0.006258133550484974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,10240,0.029269333680470782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,8192,0.019339734315872194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,8192,0.02763306697209676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,256,0.0056085333228111265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,7168,0.01756160060564677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,7168,0.025753599405288697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,1024,0.008373333017031352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,6144,0.015668267011642457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,6144,0.023909332354863484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,65536,0.24571305910746255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,16384,0.06657493511835734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,5120,0.015101866920789084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,5120,0.02164693276087443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,12288,0.05443946520487467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,4096,0.01323946714401245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,4096,0.02039573391278585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,4096,0.02213866710662842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,3584,0.011619200309117634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,3584,0.019858133792877198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,10240,0.048000001907348634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,3072,0.01071679989496867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,3072,0.018949333826700845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,3072,128,0.005294933418432872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,2560,0.00839466651280721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,8192,0.0391264001528422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,2560,0.018169599771499633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,2048,0.00751146674156189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,2048,0.017299199104309083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,7168,0.03314666748046875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,1536,0.0060597335298856105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,1536,0.016229333480199178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,1024,0.004956800242265066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,1024,0.01566506624221802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,5120,0.025526400407155352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,768,0.004403199752171834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,768,0.015660799543062844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,3584,0.019860267639160156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,512,0.003913599997758865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,3072,0.01779306729634603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,512,0.015390933553377787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,2560,0.01577600042025248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,256,0.00352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,256,0.014915200074513755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,2048,0.012597333391507468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,128,0.0033098667860031127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,128,0.014683733383814493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,128,0.005287466446558634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,6144,0.0304639995098114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,64,0.003018666555484136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,64,0.014769066373507181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2560,32,0.003186133255561193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2560,32,0.014682666460673014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,1024,0.008198399841785432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,65536,0.07999040285746256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,65536,0.08666773637135824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,16384,0.026408533255259197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,65536,0.23920532862345376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,16384,0.03429226477940877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,12288,0.021297067403793335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,12288,0.028893866141637164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,10240,0.018810667594273887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,768,0.0072405333320299785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,10240,0.027423999706904095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,8192,0.01607039968172709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,8192,0.025547732909520466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,8192,0.03552853266398112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,7168,0.014298666516939798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,7168,0.023717333873113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,512,0.006355200211207073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,1536,0.010450133681297302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,6144,0.013027200102806091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2560,256,0.00553706685702006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,6144,0.022818134228388468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,6144,0.026391466458638508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,5120,0.013767466942469279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,5120,0.021625600258509316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,4096,0.011723732948303223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,4096,0.019898666938145956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,3584,0.010283733407656353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,3584,0.06582186619440714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,12288,0.05125333468119303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,3072,0.009333333373069764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,10240,0.04321493307749431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,3072,0.01829973260561625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,2560,0.007970133423805236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,2560,0.017605332533518474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,7168,0.030138667424519854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,2048,0.007020799815654755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,2048,0.01685439944267273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,5120,0.02365013360977173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,1536,0.0056757330894470215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,1536,0.01625279982884725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,1536,0.009818666179974874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,4096,0.020256000757217407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,1024,0.004448000093301137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,16384,0.0657482663790385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,1024,0.01600320041179657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,768,0.004208000004291534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,768,0.01607360045115153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,512,0.003917866696914038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,512,0.015427199999491372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,256,0.003489066660404205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,256,0.014814933141072592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,3072,0.016038399934768677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,128,0.00310506671667099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,128,0.014535466829935709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,2560,0.014325333635012307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,64,0.002940800040960312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,64,0.014765866597493491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,2048,0.011891200145085653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,2048,32,0.003171200056870779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,2048,32,0.014910933375358582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,65536,0.06688106854756673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,65536,0.08087893327077231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,1024,0.007742933432261149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,3584,0.018962132930755615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,16384,0.022171733776728313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,16384,0.029921066761016846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,768,0.007063466807206471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,12288,0.017463467518488564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,12288,0.03041386604309082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,10240,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,10240,0.02741439938545227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,8192,0.013265066345532737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,8192,0.025111466646194458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,128,0.005130666494369507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,7168,0.012311466534932454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,7168,0.024974934260050454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,512,0.006337066491444905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,6144,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,65536,0.23093867301940918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,6144,0.02285226583480835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,5120,0.01321386694908142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,12288,0.051668266455332436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,5120,0.01996586720148722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,2048,256,0.005660800139109293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,4096,0.011310933033625285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,10240,0.04511466821034749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,4096,0.019621332486470543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,3584,0.010259200135866802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,3584,0.018985599279403687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,8192,0.038320000966389975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,3072,0.00951039989789327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,3072,0.01806933283805847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,2560,0.0074432000517845156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,2560,0.017478400468826295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,7168,0.03121066689491272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,2048,0.005866666634877523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,2048,0.01688213348388672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,16384,0.06583253145217896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,1536,0.0053951998551686605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,6144,0.029747200012207032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,1536,0.016424533724784852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,1024,0.004588800172011057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,5120,0.024231467644373575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,1024,0.015957333644231162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,4096,0.021182932456334434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,768,0.004262400170167288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,768,0.015195733308792115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,3584,0.018703999121983846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,512,0.0037205333511034647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,512,0.015077333648999533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,256,0.0033781332274278007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,256,0.014813866217931113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,2560,0.014135467012723288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,128,0.0030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,128,0.01453013320763906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,128,0.004940799872080485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,2048,0.012054399649302164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,64,0.0028629332780838014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,1536,0.00985599954922994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,64,0.01469546655813853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1536,32,0.0032000000278155005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1536,32,0.014634666840235391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,65536,0.04419413407643636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,1024,0.007787733276685078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,65536,0.06784319877624512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,16384,0.01593066652615865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,3072,0.01662506659825643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,65536,0.20845972696940102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,16384,0.02910826603571574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,12288,0.013792000214258828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,512,0.006214400132497152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,12288,0.025441066424051924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,10240,0.01414293348789215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,10240,0.025614933172861738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,8192,0.012707199652989706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,8192,0.02347093423207601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,256,0.0052928000688552855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,7168,0.01195733348528544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,7168,0.02259413401285807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,6144,0.012500266234079996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,6144,0.02132479945818583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1536,768,0.006986666719118755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,5120,0.011098666985829671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,5120,0.019748266537984213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,16384,0.06133333444595337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,4096,0.009691733121871948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,4096,0.018905599912007652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,12288,0.04727146625518799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,3584,0.00888213316599528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,3584,0.018544000387191773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,10240,0.040705064932505294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,3072,0.007890133559703827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,3072,0.01834133267402649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,2560,0.00690773328145345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,7168,0.030187733968098956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,2560,0.01738026738166809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,2048,0.005830400188763936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,2048,0.01688213348388672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,6144,0.026100265979766845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,2048,0.011270399888356526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,1536,0.005162666738033295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,1536,0.016619732975959776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,1536,0.00946453313032786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,1024,0.0046410664916038515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,1024,0.01572266618410746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,1024,0.007718400160471599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,768,0.004161066561937332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,768,0.015732266505559287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,512,0.003684266656637192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,5120,0.02226346731185913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,512,0.014839466412862143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,8192,0.03357013463973999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,256,0.0033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,256,0.014862933754920959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,3584,0.016862932840983072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,128,0.0030378667016824085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,128,0.014200533429781595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,3072,0.014903466900189719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,64,0.002845866729815801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,64,0.014617600043614707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,1024,32,0.002980266759792964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,1024,32,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,2560,0.013107200463612875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,65536,0.039539198080698654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,65536,0.06355199813842774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,16384,0.013286399841308593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,16384,0.028775467475255327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,12288,0.016302933295567833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,12288,0.024309333165486655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,768,0.006814933319886525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,4096,0.01950826644897461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,10240,0.013792000214258828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,10240,0.023129600286483764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,512,0.006099199752012888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,8192,0.012170666456222534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,8192,0.02109439969062805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,256,0.005313066641489664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,7168,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,7168,0.020555732647577922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,6144,0.010620799660682679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,6144,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,65536,0.18329920768737792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,5120,0.009221333265304565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,16384,0.051585066318511966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,5120,0.019453867276509603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,4096,0.008106666803359985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,4096,0.018885332345962524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,12288,0.03870720068613688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,3584,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,3584,0.018278400103251137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,3584,0.014217600226402283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,10240,0.03303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,3072,0.006971733272075653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,1024,128,0.004987733562787374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,3072,0.017641599973042807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,2560,0.006596266726652781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,2560,0.01722986698150635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,7168,0.02480640014012655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,2048,0.005633066594600678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,2048,0.016846932967503867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,6144,0.02127573291460673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,2048,0.00950933297475179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,1536,0.004994133114814758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,1536,0.016081066926320393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,1024,0.004312533140182495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,5120,0.018439465761184694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,1024,0.015485866864522298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,768,0.003923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,4096,0.01609599987665812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,768,0.015362133582433065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,512,0.0035946667194366455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,8192,0.028113067150115967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,512,0.015217066804567973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,256,0.003186133255561193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,512,0.005694933235645294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,3072,0.012428800264994305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,256,0.014710399508476257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,128,0.0029834667841593427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,128,0.014419200023015341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,2560,0.011310933033625285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,64,0.0027530667682488757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,64,0.014379733800888061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,768,32,0.002784000088771184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,768,32,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,65536,0.02919680078824361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,65536,0.058221864700317386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,1536,0.008248533308506011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,16384,0.013041067123413085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,16384,0.02617599964141846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,12288,0.010806399583816528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,1024,0.006938666601975759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,12288,0.029079467058181763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,12288,0.02239146629969279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,10240,0.009993599851926167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,10240,0.020806399981180827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,10240,0.024476800362269083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,768,0.006275199850400289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,8192,0.009468799829483033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,8192,0.019960532585779824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,8192,0.02127893368403117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,7168,0.008937600255012512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,7168,0.020167466004689535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,7168,0.01758613387743632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,6144,0.008262399832407634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,6144,0.01997119983037313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,5120,0.007441066702206929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,128,0.004857600231965383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,5120,0.01981333295504252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,4096,0.00724480003118515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,4096,0.01874026656150818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,3584,0.0069482664267222095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,3584,0.01111253301302592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,3584,0.018151466051737467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,3072,0.006534400085608165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,65536,0.13904852867126466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,3072,0.017890133460362754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,16384,0.038483198483784994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,2560,0.006333866715431213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,2560,0.01716586748758952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,2560,0.008926933010419209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,2048,0.005629866818586985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,2048,0.016563199957211814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,1536,0.004889599978923798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,1536,0.016049066185951234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,768,256,0.005231999854246775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,1024,0.004282666742801667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,6144,0.016008533040682473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,1024,0.015601066748301187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,1024,0.006247466802597046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,768,0.0037621334195137024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,5120,0.014017066359519959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,768,0.01535040040810903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,768,0.0057205334305763245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,512,0.0034613333642482757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,512,0.014869333306948344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,512,0.005494399865468343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,256,0.0032298666735490165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,256,0.014653866489728292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,128,0.0029738667110602063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,3072,0.009613866607348125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,128,0.004747733473777771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,128,0.014631467064221701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,64,0.0028266665836175283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,64,0.014219733079274497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,512,32,0.014545067151387533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,512,32,0.0026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,65536,0.026394667228062947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,2048,0.00786240001519521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,65536,0.054176000754038486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,16384,0.010011733571688334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,16384,0.02259946664174398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,16384,0.02640213370323181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,1536,0.007147733370463054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,12288,0.008430932958920796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,12288,0.021647999684015908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,12288,0.02111999988555908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,10240,0.007484800120194752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,10240,0.02121386726697286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,10240,0.018039466937383015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,8192,0.019768534104029338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,4096,0.01185706655184428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,8192,0.006794666747252147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,8192,0.015321600437164306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,7168,0.006557866434256236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,7168,0.019883733987808228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,6144,0.006381866832574208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,512,256,0.0049002667268117275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,6144,0.019485867023468016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,5120,0.0067221333583196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,5120,0.019811199108759562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,4096,0.006682666639486949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,4096,0.018528000513712565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,4096,0.010338133573532105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,3584,0.006621866424878438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,3584,0.018252799908320107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,3584,0.009918933113416035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,3072,0.006366933385531108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,3072,0.01738026738166809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,2560,0.006157866617043813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,2560,0.017182934284210204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,2560,0.008113066852092742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,2048,0.005504000186920166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,2048,0.016838399569193523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,2048,0.0071829333901405334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,1536,0.004935466746489207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,1536,0.016132266322771708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,1536,0.006763733426729838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,1024,0.004081066697835922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,1024,0.015453867117563882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,768,0.0037546666959921518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,768,0.015204266707102457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,6144,0.012462932864824932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,512,0.003542399903138479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,512,0.014858667055765787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,5120,0.0114847997824351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,512,0.005340800185998281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,256,0.003058133274316788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,256,0.014262400070826211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,256,0.004886400202910105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,128,0.0028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,128,0.014260266224543253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,65536,0.09797653357187906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,64,0.0027647999425729113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,64,0.014196266730626425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,256,32,0.0027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,3072,0.008475733796755473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,256,32,0.01434346636136373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,65536,0.01300373375415802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,65536,0.04717119932174683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,16384,0.007468800246715546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,16384,0.022382932901382446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,12288,0.006775466601053874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,12288,0.020558933417002358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,12288,0.021050665775934854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,10240,0.006704000135262807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,7168,0.014003200332323709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,10240,0.020358399550120036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,8192,0.0066890666882197065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,1024,0.0059125334024429325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,8192,0.019784533977508546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,8192,0.015497600038846334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,7168,0.006425599753856659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,768,0.005392000079154968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,7168,0.020010666052500407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,6144,0.006159999966621399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,6144,0.019517866770426433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,5120,0.006571733454863231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,5120,0.01928960084915161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,4096,0.006086400151252747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,4096,0.01843520005544027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,65536,0.0967146635055542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,3584,0.00664106657107671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,3584,0.01771413286526998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,3584,0.009760000308354696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,10240,0.018273067474365235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,3072,0.006284800171852112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,3072,0.017037866512934367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,3072,0.008419199784596761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,2560,0.006145066519578298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,2560,0.016752000649770102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,2048,0.005406933526198069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,6144,0.012506666779518127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,2048,0.016532267133394875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,256,128,0.004710400104522705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,1536,0.004728533327579498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,5120,0.011373866597811382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,1536,0.006771199901898702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,1536,0.015800533692042033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,1024,0.004107733319203059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,1024,0.015425067146619162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,1024,0.005963733295599619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,768,0.003703466554482778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,768,0.015030399958292643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,4096,0.01002346674601237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,512,0.003319466610749563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,16384,0.026756266752878826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,512,0.014937600493431092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,512,0.00514026681582133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,256,0.003036800026893616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,256,0.01437440017859141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,256,0.004849066833655039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,128,0.0028384000062942503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,128,0.014777599771817525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,64,0.0026122666895389557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,7168,0.014036267002423605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,64,0.014317867159843446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,128,32,0.0026229334374268847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,65536,0.01325866679350535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,128,32,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,65536,0.04447893301645915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,16384,0.006698666512966156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,2560,0.008133333424727123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,12288,0.006380799909432728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,16384,0.023341866334279378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,12288,0.020572799444198608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,10240,0.006523733337720235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,10240,0.020038400093714395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,2048,0.0070154666900634766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,8192,0.006433066725730896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,8192,0.019403733809789023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,7168,0.00625493327776591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,7168,0.01949119965235392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,6144,0.006027733286221823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,6144,0.019356799125671387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,5120,0.006534400085608165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,5120,0.01907306710879008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,4096,0.006056533257166544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,4096,0.018091734250386557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,3584,0.00660693347454071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,3584,0.017990400393803917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,3072,0.006208000083764395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,3072,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,2560,0.006006399790445963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,2560,0.01694399913152059
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,2048,0.005331199864546458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,2048,0.016383999586105348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,128,0.004733866453170777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,1536,0.0046741331617037455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,1536,0.015825066963831583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,1024,0.0041685332854588825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,1024,0.015386666854222616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,768,0.0036533333361148832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,768,0.015052800377209982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,512,0.0032831999162832894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,512,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,256,0.0030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,256,0.014817066987355552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,128,0.002784000088771184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,128,0.014134400089581809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,64,0.0026687999566396077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,160,128,768,0.00547626664241155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,64,0.014217600226402283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,16384,0.0222378671169281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,12288,0.006804266571998596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,64,32,0.0026335999369621276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,64,32,0.014363732933998109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,65536,0.009989333152770997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,65536,0.04253546794255574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,16384,0.006562133133411407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,12288,0.020207999149958293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,10240,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,8192,0.006251733501752217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,10240,0.020096000035603842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,8192,0.019978666305541994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,7168,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,7168,0.02062293291091919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,6144,0.005942399799823761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,6144,0.01904639999071757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,5120,0.0063296000162760425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,5120,0.01986666719118754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,4096,0.006062933305899302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,4096,0.018472532431284584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,2560,0.016752000649770102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,3584,0.006517333288987477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,3584,0.01769066651662191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,3072,0.006033066908518473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,3072,0.017297067244847617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,2560,0.005995733539263407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,2048,0.005420800050099691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,2048,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,1536,0.00470719983180364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,1536,0.01620266636212667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,1024,0.0040618665516376495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,1024,0.015853866934776306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,768,0.003606399893760681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,768,0.015063466628392539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,512,0.0034261333445707956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,512,0.01525759994983673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,256,0.0029674666623274487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,256,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,128,0.002810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,128,0.014180266857147216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,64,0.0026464000344276427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,64,0.01430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,160,32,32,0.002674133330583572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,160,32,32,0.014390400052070618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,16384,0.541543451944987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,16384,0.29460051854451497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,12288,0.40576534271240233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,12288,0.22387199401855468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,10240,0.3410698572794596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,10240,0.2112650712331136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,8192,0.2745429356892904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,8192,0.15782079696655274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,8192,0.15035947163899738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,7168,0.2397984027862549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,7168,0.14144105911254884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,6144,0.20741012891133628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,6144,0.12408533096313476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,6144,0.1162933349609375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,5120,0.17507200241088866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,5120,0.10797653198242188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,4096,0.14287999471028645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,4096,0.09090773264567056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,3584,0.12487680117289227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,4096,0.08373653093973796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,3584,0.08224213123321533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,3584,0.07438399791717529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,16384,0.290067195892334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,3072,0.10904106299082439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,12288,0.2155893325805664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,3072,0.07407893339792887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,3072,0.06800959904988607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,2560,0.09317866961161295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,2560,0.06516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,2560,0.05657066504160563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,2048,0.08016746838887533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,2048,0.055929601192474365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,2048,0.049437868595123294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,1536,0.04617919921875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,1536,0.060038399696350095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,1536,0.040218667189280195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,1024,0.04341226816177368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,7168,0.1322719971338908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,1024,0.037922132015228274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,768,0.03396799961725871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,768,0.033701332410176595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,768,0.02898240089416504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,512,0.026742400725682576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,256,0.016740266482035318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,512,0.027778132756551104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,5120,0.09991573492685954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,512,0.021281067530314127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,256,0.022686932484308878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,128,0.010757333040237427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,128,0.02100693384806315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,128,0.016458666324615477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,64,0.008489599823951722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,64,0.01992959976196289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,65536,32,0.008470400174458822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,10240,0.18274240493774413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,65536,32,0.020803199211756388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,16384,0.13567253748575847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,16384,0.09477333227793375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,65536,0.321067746480306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,16384,0.07819093068440755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,65536,0.5573354721069336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,12288,0.10415253639221192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,12288,0.07109333674112955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,12288,0.06476159890492758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,10240,0.08583253224690755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,8192,0.07019093036651611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,10240,0.06683093706766764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,8192,0.05327359835306803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,7168,0.06146133343378703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,7168,0.04838293393452962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,6144,0.05349440177281698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,6144,0.04463893175125122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,256,0.017951999107996622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,5120,0.045665065447489425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,6144,0.034436265627543136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,5120,0.03980799913406372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,4096,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,4096,0.03488853375116984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,3584,0.033768534660339355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,3584,0.032942932844161985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,3584,0.023316266139348348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,65536,0.29320106506347654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,3072,0.029575467109680176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,65536,1024,0.030522666374842328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,3072,0.034169598420461016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,2560,0.026106667518615723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,2560,0.02926186720530192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,10240,0.05168106555938721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,2048,0.02184106707572937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,2048,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,2048,0.016657066345214844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,1536,0.01768853267033895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,1536,0.022547199328740438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,7168,0.03842133283615112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,1536,0.013938132921854654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,1024,0.012614400188128153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,1024,0.019845332702000937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,768,0.010222933689753215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,5120,0.030168533325195312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,768,0.01934400002161662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,768,0.010674132903416952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,512,0.008057599763075511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,512,0.018322134017944337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,4096,0.025231999158859254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,256,0.005846400062243144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,256,0.01644373337427775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,256,0.007310933371384938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,128,0.004545066754023234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,128,0.015897599856058757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,128,0.00693333347638448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,64,0.004138666639725367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,64,0.01596799989541372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,16384,32,0.0046304002404212955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,3072,0.02100906570752462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,16384,32,0.016150400042533875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,8192,0.043399465084075925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,16384,0.10133972962697346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,65536,0.4150037447611491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,65536,0.25095465977986653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,16384,0.07259306907653809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,16384,0.06654400030771891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,1024,0.011754666765530903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,12288,0.0813418706258138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,12288,0.05829439957936605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,10240,0.06640533208847046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,10240,0.05131306648254395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,10240,0.04100159804026286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,8192,0.0530623992284139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,8192,0.0449290672938029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,512,0.008762666583061218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,7168,0.047076265017191574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,7168,0.041229867935180665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,6144,0.041347201665242514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,6144,0.0375765323638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,5120,0.0356544017791748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,16384,2560,0.01892906626065572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,5120,0.03428586721420288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,4096,0.029402667284011842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,4096,0.030385067065556843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,3584,0.026088533798853557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,3584,0.028809599081675213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,3584,0.01908479928970337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,65536,0.23937813440958658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,3072,0.02325119972229004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,3072,0.01699413259824117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,3072,0.02728853424390157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,12288,0.04763306776682536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,2560,0.020329600572586058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,2560,0.024259199698766075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,2048,0.016927999258041383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,2048,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,2048,0.0132533331712087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,1536,0.013290666540463767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,1536,0.021180800596872964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,1536,0.011431466539700825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,7168,0.03087573250134786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,1024,0.009900800387064616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,1024,0.019296000401178993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,6144,0.027541333436965944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,768,0.00842133363087972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,768,0.018172800540924072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,5120,0.023988266785939537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,768,0.008575999736785888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,512,0.016023466984430947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,512,0.006969599922498067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,256,0.004565333326657614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,256,0.015946666399637856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,256,0.00628053347269694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,128,0.004009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,128,0.015338666240374246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,128,0.006038400034109751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,64,0.0036469332873821257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,64,0.015748266379038492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,12288,32,0.0040522667268912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,12288,32,0.01548373301823934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,8192,0.0343178669611613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,2560,0.015131733814875283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,16384,0.08804053465525309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,65536,0.35790185928344725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,16384,0.06589013338088989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,65536,0.22430720329284667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,65536,0.20777386029561362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,16384,0.053887999057769774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,12288,0.07343680063883463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,12288,0.05176000197728475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,10240,0.06165759960810343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,1024,0.009640533725420635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,10240,0.04654719829559326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,4096,0.020590933163960774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,8192,0.047096534570058184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,8192,0.04059946537017822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,7168,0.040830934047698976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,7168,0.0373525341351827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,12288,512,0.007055999835332234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,6144,0.03535573482513428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,6144,0.03476586739222209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,5120,0.03050559957822164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,5120,0.030808534224828082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,4096,0.02574613293011983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,4096,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,4096,0.018045866489410402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,3584,0.023201066255569457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,3584,0.02564479907353719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,12288,0.0443125327428182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,3072,0.020500266551971437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,10240,0.03634133338928223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,3072,0.023843199014663696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,8192,0.030158933003743487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,2560,0.017338667313257852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,7168,0.027030400435129803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,2560,0.022488532463709514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,2048,0.014340266585350037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,2048,0.021154133478800456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,2048,0.011818666259447734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,1536,0.011222400267918905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,1536,0.019694934288660683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,1024,0.00846506655216217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,1024,0.018322134017944337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,3584,0.016432000199953715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,768,0.007292800148328145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,768,0.016341333587964378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,512,0.006178133189678192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,512,0.015987199544906617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,3072,0.014828800161679586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,6144,0.02455679972966512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,256,0.004137599964936575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,5120,0.021432532866795858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,256,0.015596800049146018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,2560,0.013460266590118408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,128,0.0037845333417256674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,128,0.015316266814867655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,64,0.0037216000258922578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,64,0.015398400028546652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,10240,32,0.0038730666041374207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,1536,0.01016213297843933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,10240,32,0.015422933300336204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,65536,0.18884480794270833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,65536,0.28266986211140954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,1024,0.008841600020726521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,16384,0.0703765312830607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,16384,0.05970986684163412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,12288,0.05342613458633423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,768,0.007216000060240428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,12288,0.04578346808751424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,10240,0.045748265584309895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,10240,0.04089279969533284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,512,0.006378666559855144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,8192,0.03840959866841634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,8192,0.03627413511276245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,7168,0.03281280001004537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,256,0.0058229332168896995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,7168,0.03312000036239624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,10240,128,0.005644799768924713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,6144,0.028823467095692952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,6144,0.030470399061838786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,5120,0.025050665934880572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,5120,0.028010666370391846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,16384,0.0463648001352946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,4096,0.020937599738438926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,12288,0.03702720006306966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,4096,0.024842667579650878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,10240,0.03043946623802185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,3584,0.018540799617767334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,3584,0.023923200368881226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,3072,0.016476800044377647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,3072,0.022205867369969687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,7168,0.023286400238672893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,2560,0.014433067043622336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,6144,0.020887466271718343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,2560,0.02145706613858541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,2048,0.011879466970761617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,2048,0.020359466473261513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,5120,0.01867199937502543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,1536,0.009771733482678732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,1536,0.018582399686177573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,65536,0.18675626118977864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,1024,0.008213333288828532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,1024,0.017513600985209148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,8192,0.026230400800704955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,768,0.006939733525117238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,768,0.01688533425331116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,4096,0.0160480002562205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,512,0.005128533144791921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,512,0.015624533096949259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,3072,0.0132341335217158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,256,0.004036266605059306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,256,0.01530026694138845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,3584,0.014713600277900696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,2048,0.010945066809654236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,128,0.0036703998843828833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,128,0.014840533336003622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,64,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,64,0.014973866939544677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,8192,32,0.003664000084002813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,2560,0.012174933155377706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,8192,32,0.015459199746449789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,65536,0.2624704043070475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,65536,0.17195520401000977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,16384,0.06895893414815267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,1536,0.010005333026250203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,16384,0.05802559852600098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,768,0.00694400022427241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,12288,0.05420693159103394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,12288,0.0439850648244222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,1024,0.007880533238252004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,512,0.006427733103434245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,10240,0.04194986820220947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,256,0.006019199887911478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,10240,0.039843201637268066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,8192,0.03401279846827189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,8192,0.03442879915237427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,8192,128,0.0057322666049003605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,7168,0.030462932586669923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,7168,0.0315285325050354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,6144,0.026680533091227216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,6144,0.029318400224049884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,5120,0.023436800638834635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,5120,0.025763199726740522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,65536,0.1655786673227946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,4096,0.020963199933369956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,16384,0.04428799947102864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,12288,0.03401386737823486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,4096,0.023540266354878745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,3584,0.017409066359202065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,3584,0.02266133427619934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,3072,0.01546346644560496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,3072,0.02130026618639628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,10240,0.02988160053888957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,2560,0.012498133381207784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,2560,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,6144,0.020141865809758505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,7168,0.022627200682957968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,2048,0.010609066486358643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,8192,0.025049599011739095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,2048,0.01947306593259176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,1536,0.00881173312664032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,1536,0.018580265839894614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,1024,0.007066666583220164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,1024,0.01616106629371643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,5120,0.017790933450063072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,768,0.00631039987007777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,768,0.016194132963816325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,3072,0.012379733721415202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,4096,0.015171200037002563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,512,0.00436160018046697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,512,0.015571199854214988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,3584,0.013658666610717773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,256,0.0038133333126703895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,256,0.015268266201019287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,128,0.0034474665919939675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,128,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,1536,0.009238400061925252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,2560,0.01164479951063792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,64,0.00325546662012736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,64,0.014998400211334228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,7168,32,0.0033312000334262846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,7168,32,0.015267200271288552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,2048,0.010281599561373393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,65536,0.21413013140360512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,65536,0.15016533533732096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,1024,0.0070933332045873005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,16384,0.06495253245035806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,16384,0.047430400053660074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,12288,0.04165013233820598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,12288,0.03921066522598267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,10240,0.03565333286921184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,768,0.006563200056552887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,10240,0.03496426741282145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,8192,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,8192,0.031946667035420734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,256,0.005602133274078369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,512,0.0059445331494013464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,7168,0.0255786657333374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,7168,128,0.005339733262856802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,7168,0.0287989338239034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,6144,0.022657066583633423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,6144,0.026268800099690754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,5120,0.01972800095876058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,5120,0.024346667528152465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,12288,0.03179840048154195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,4096,0.016748799880345663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,65536,0.1613653341929118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,16384,0.04085973501205444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,4096,0.02249493400255839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,3584,0.014756266276041666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,3584,0.02165013353029887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,3072,0.013012267152468362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,3072,0.020747733116149903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,10240,0.02773653268814087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,2560,0.011453866958618164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,2560,0.01986879905064901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,8192,0.023427200317382813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,2048,0.00943893293539683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,2048,0.019035732746124266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,6144,0.01881600022315979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,1536,0.007968000074227651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,1536,0.01803306738535563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,7168,0.02110613385836283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,1024,0.0065194666385650635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,1024,0.01614293356736501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,5120,0.01648853321870168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,768,0.005560533205668131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,768,0.016184533635775246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,4096,0.014310399691263834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,3072,0.011573333541552227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,512,0.0042133331298828125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,512,0.015408000349998474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,3584,0.012888532876968384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,256,0.003589333345492681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,256,0.014846932888031007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,128,0.0033151999115943907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,2560,0.011026133100191753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,1536,0.008504533767700195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,128,0.014475733041763306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,64,0.0032074667513370516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,64,0.0149536003669103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,6144,32,0.003421866645415624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,2048,0.00978773335615794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,6144,32,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,65536,0.1859231948852539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,65536,0.13188800017038982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,16384,0.05696959892908732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,16384,0.048527999718983965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,12288,0.041220267613728837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,12288,0.03678826491038005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,1024,0.0066997334361076355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,10240,0.036152533690134686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,10240,0.032857600847880045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,768,0.006203733384609222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,256,0.005322666466236114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,512,0.005723733206590017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,8192,0.025821866591771443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,6144,128,0.005129600067933401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,8192,0.02980159918467204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,7168,0.02314773400624593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,7168,0.027508266766866046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,6144,0.020584533611933388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,6144,0.025054933627446492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,5120,0.017989333470662436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,5120,0.023461333910624185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,12288,0.029725867509841918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,65536,0.14352107048034668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,16384,0.03831040064493815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,4096,0.015280000368754067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,4096,0.021553067366282146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,3584,0.013326932986577352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,3584,0.02073600093523661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,3072,0.01163093348344167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,3072,0.020221867163976035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,10240,0.025862399737040204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,2560,0.009910399715105694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,2560,0.01920959949493408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,6144,0.017537067333857216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,2048,0.00862506628036499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,2048,0.018433066209157307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,7168,0.020081067085266115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,8192,0.022106667359670006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,1536,0.007249066730340321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,1536,0.016594133774439492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,1024,0.006060799956321717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,1024,0.015386666854222616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,5120,0.015863466262817382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,768,0.0046079998215039575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,768,0.0156960000594457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,4096,0.013593600193659464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,3072,0.01130986710389455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,512,0.003995733211437861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,512,0.015171200037002563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,3584,0.012453333536783854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,256,0.003550933301448822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,256,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,128,0.0032650666932264962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,128,0.014484266440073649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,2560,0.010412800312042236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,64,0.003139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,64,0.01455893317858378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,5120,32,0.0031626666585604347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,5120,32,0.014621866742769876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,1536,0.007925333579381307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,65536,0.14410026868184406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,2048,0.009452799956003826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,65536,0.11372693379720052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,16384,0.04141120115915935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,16384,0.041621331373850504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,12288,0.03306879997253418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,1024,0.006629333396752675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,12288,0.03391679922739665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,768,0.0061482667922973635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,10240,0.029191466172536214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,10240,0.03104426662127177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,8192,0.026866134007771807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,8192,0.028331732749938963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,256,0.0052906667192777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,512,0.005658666789531708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,7168,0.021203200022379555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,5120,128,0.005137066543102265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,7168,0.02563626567522685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,6144,0.019049600760142008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,6144,0.02416426738103231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,5120,0.016090666254361473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,65536,0.12534293333689372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,16384,0.03221759994824727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,5120,0.022716800371805825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,4096,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,4096,0.020932267109553017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,3584,0.012429866194725036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,12288,0.024961066246032716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,3584,0.020497065782546998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,10240,0.021824000279108684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,3072,0.01069760024547577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,3072,0.019497599204381308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,2560,0.009547733267148336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,2560,0.01898026665051778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,8192,0.019004799922307334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,2048,0.008292266726493835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,7168,0.01672640045483907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,2048,0.018541866540908815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,6144,0.01514240006605784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,1536,0.007411199808120728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,1536,0.016771199305852254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,1024,0.00553706685702006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,1024,0.015936000148455302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,5120,0.013660800457000733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,768,0.004732800026734671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,768,0.016042666633923848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,4096,0.012609066565831504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,512,0.004129066566626231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,512,0.015553067127863566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,3072,0.010502400000890096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,256,0.0037216000258922578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,256,0.014986667037010192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,3584,0.011517866452534994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,128,0.003356799980004629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,2048,0.00860693355401357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,128,0.014551466703414917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,64,0.0031456001102924346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,1024,0.006221866607666016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,2560,0.009823999802271525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,64,0.014865066607793173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,4096,32,0.0033845332761605583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,4096,32,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,65536,0.12838186422983805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,65536,0.10619839827219646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,1536,0.00751146674156189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,16384,0.03997013171513875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,16384,0.04059520165125529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,12288,0.031905066967010495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,12288,0.03176213304201762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,10240,0.027553067604700728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,10240,0.028536534309387206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,768,0.006033066908518473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,8192,0.019845332702000937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,512,0.0055071999629338585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,8192,0.025900799036026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,256,0.005366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,7168,0.01773759921391805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,4096,128,0.005020800232887268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,7168,0.02397226691246033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,6144,0.015851733088493348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,6144,0.02297280033429464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,5120,0.0136543999115626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,5120,0.021811199188232423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,65536,0.11496213277180989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,16384,0.03147626717885335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,4096,0.012408533692359924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,4096,0.02076693375905355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,12288,0.02448426683743795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,3584,0.01032533347606659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,3584,0.0202890674273173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,10240,0.02118720014890035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,3072,0.009427199761072796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,3072,0.019406932592391967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,2560,0.008711466193199157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,2560,0.018722132841746012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,8192,0.01783999999364217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,7168,0.016660267114639284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,2048,0.007875200112660725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,2048,0.01730453372001648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,6144,0.015003732840220132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,1536,0.0067071999112765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,1536,0.016369066635767617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,1024,0.005187200009822845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,1024,0.01578133304913839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,4096,0.01242026686668396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,768,0.004745600124200186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,768,0.015372799833615622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,3072,0.010413866241772969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,512,0.004164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,3584,0.011426132917404175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,512,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,256,0.003499733408292135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,2560,0.00958079993724823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,256,0.01511573294798533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,128,0.003218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,128,0.014641066392262777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,5120,0.013591466347376504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,64,0.0031829332311948144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,2048,0.007948799928029378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3584,32,0.003142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,64,0.01480959951877594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3584,32,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,1024,0.006141866743564606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,65536,0.11546773115793865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,1536,0.007073066631952922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,65536,0.09840213457743327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,16384,0.03418986797332764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,16384,0.03758080005645752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,12288,0.026706133286158246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,12288,0.029981867472330732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,768,0.005784533421198527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,10240,0.023613866170247397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,512,0.005629866818586985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,10240,0.02733333309491475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,256,0.005276800195376078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,8192,0.020811732610066733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3584,128,0.004972800115744273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,8192,0.02540160020192464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,7168,0.016822399695714314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,7168,0.023512534300486245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,6144,0.014800000190734863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,6144,0.02256960074106852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,5120,0.01270080010096232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,5120,0.02128960092862447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,65536,0.1241930643717448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,12288,0.026099199056625368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,16384,0.033846398194630936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,4096,0.011707733074824016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,4096,0.02027413249015808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,3584,0.009975467125574749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,3584,0.019475199778874717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,3072,0.009046399593353271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,3072,0.0192522664864858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,10240,0.02284160057703654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,2560,0.008216533561547596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,2560,0.01717546582221985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,8192,0.018963199853897095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,7168,0.017332265774408974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,2048,0.007420800129572551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,2048,0.017107200622558594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,1536,0.006409599880377452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,1536,0.0162581334511439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,6144,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,1024,0.004741333425045013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,5120,0.014223999778429666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,1024,0.015830399592717488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,768,0.004301866888999939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,768,0.016059733430544534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,4096,0.012482133507728577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,512,0.003748266647259394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,3072,0.010082133611043294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,512,0.015244799852371215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,3584,0.011389866471290588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,256,0.0033151999115943907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,256,0.014948266744613647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,128,0.003124266614516576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,128,0.014492799838383993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,2560,0.009103999535242716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,64,0.0029397333661715193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,1536,0.006853333115577698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,64,0.014922666549682616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,3072,32,0.0030901332696278887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,3072,32,0.014727466305096946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,2048,0.007931733131408691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,65536,0.09746560255686441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,65536,0.08947520256042481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,16384,0.03079040050506592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,16384,0.034968535105387374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,768,0.00589333325624466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,1024,0.006300800045331319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,12288,0.024650667111078897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,12288,0.02955840031305949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,10240,0.021590399742126464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,10240,0.026841600735982258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,8192,0.018259199460347493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,8192,0.024652800957361855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,512,0.0055754666527112326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,256,0.005125333368778229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,7168,0.01647040049235026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,7168,0.02288533250490824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,6144,0.013706666231155396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,3072,128,0.004926933348178864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,6144,0.02198293407758077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,5120,0.012049067020416259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,5120,0.021178666750590006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,12288,0.025018666187922162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,65536,0.12067093054453533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,4096,0.01037440001964569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,16384,0.033073065678278606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,4096,0.019916800657908122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,3584,0.009715200463930766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,3584,0.019668267170588175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,3072,0.008916266759236654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,10240,0.02221333384513855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,3072,0.018272000551223754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,2560,0.008012799918651581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,2560,0.017485866943995156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,8192,0.018598399559656777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,2048,0.007177599767843883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,2048,0.017248000701268515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,6144,0.01527466674645742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,7168,0.016909867525100708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,1536,0.005814399818579356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,1536,0.016126933693885803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,5120,0.013821867108345032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,1024,0.004950400193532308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,1024,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,4096,0.012257066369056702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,768,0.0043029333154360454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,3072,0.00959999958674113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,768,0.015185067057609558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,512,0.0038101332883040107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,3584,0.011052800218264262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,512,0.015264000495274863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,256,0.003339733431736628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,256,0.014473600188891092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,2560,0.008646399776140849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,128,0.0030762667457262674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,128,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,64,0.0029781334102153777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,2048,0.007795199751853943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,64,0.014665599664052328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2560,32,0.003020799905061722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2560,32,0.014651733636856078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,65536,0.07927040259043375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,65536,0.07758613427480063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,1536,0.00695253312587738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,16384,0.0245685338973999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,16384,0.03144320050875346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,12288,0.019768534104029338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,12288,0.027588266134262084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,1024,0.006293333570162455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,10240,0.017794134219487508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,10240,0.02495253284772237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,768,0.005838933090368906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,512,0.005636266867319743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,8192,0.016800000270207723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,256,0.00517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,8192,0.024089600642522177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2560,128,0.004926933348178864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,7168,0.014672000209490457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,7168,0.023693867524464927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,6144,0.012728533148765564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,6144,0.021797333161036173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,5120,0.011897599697113037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,5120,0.021104000012079873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,65536,0.10486826896667481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,4096,0.010179199775060018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,16384,0.02818133234977722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,4096,0.019947733481725058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,3584,0.009194667140642803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,3584,0.019056000312169395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,12288,0.022065067291259767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,3072,0.008430932958920796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,3072,0.018042665719985963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,10240,0.019292799631754558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,2560,0.007716266810894013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,2560,0.017761067549387614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,8192,0.016357333461443583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,7168,0.015148799618085226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,2048,0.006727466483910878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,2048,0.016484266519546507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,6144,0.013915733496348063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,1536,0.00556160012880961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,1536,0.016055466731389363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,1024,0.004533333579699198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,5120,0.012584533294041952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,1024,0.01552959978580475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,768,0.004193066557248434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,768,0.015465600291887918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,4096,0.010575999816258747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,512,0.0036981334288915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,512,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,3584,0.009947733084360758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,3072,0.008699733018875121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,256,0.0033642667035261786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,256,0.014586666226387024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,128,0.002977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,128,0.014345600207646688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,2560,0.008224000036716462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,64,0.002820266783237457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,64,0.01443839967250824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,2048,0.007299200197060903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,2048,32,0.002811733384927114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,2048,32,0.014430933197339377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,1024,0.005957333246866862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,65536,0.060388267040252686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,1536,0.00682773341735204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,65536,0.06846186319986979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,16384,0.019916800657908122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,768,0.005669333537419637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,16384,0.027454932530721027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,12288,0.015620266397794088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,12288,0.024845866362253825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,10240,0.013680000106493631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,10240,0.024199465910593666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,8192,0.012660266955693564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,8192,0.02330346703529358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,512,0.005336533486843109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,256,0.0051242664456367494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,7168,0.011716266473134358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,7168,0.022554665803909302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,2048,128,0.004915200173854828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,6144,0.010782933235168457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,6144,0.021617066860198975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,65536,0.09711360136667888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,5120,0.010003200173377991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,16384,0.027538132667541505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,12288,0.021434666713078816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,5120,0.01926506757736206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,4096,0.009634133179982502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,10240,0.018939733505249023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,4096,0.018901334206263224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,3584,0.008988799651463826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,3584,0.018402133385340372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,3072,0.008065066734949748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,3072,0.017514665921529136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,8192,0.016025599837303162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,2560,0.007122133175532024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,2560,0.01714986761411031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,7168,0.01469546655813853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,2048,0.005827199916044871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,2048,0.016860800981521606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,1536,0.00510506679614385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,1536,0.016382933656374613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,6144,0.013657599687576294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,1024,0.004260266820589701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,1024,0.015640532970428465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,5120,0.011845333377520244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,768,0.003984000037113826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,768,0.015250133474667868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,4096,0.01034773290157318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,512,0.003656533360481262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,3072,0.008516266942024231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,512,0.01502826710542043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,256,0.0032874666154384612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,256,0.014685866236686707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,3584,0.00958079993724823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,128,0.0029077333708604175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,2048,0.007136000196139018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,128,0.01446613371372223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,2560,0.008266666531562805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,64,0.002872533351182938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1536,32,0.002885333448648453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,64,0.014546133081118264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1536,32,0.014614400267601014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,1536,0.0069578667481740315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,65536,0.04404266675313313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,1024,0.005824000140031179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,65536,0.05966613292694092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,16384,0.015719466408093772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,16384,0.024939733743667602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,16384,0.027065600951512652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,12288,0.012389333049456278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,12288,0.02209920088450114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,10240,0.011397332946459452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,10240,0.02142613331476847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,10240,0.01877440015474955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,8192,0.010214400291442872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,8192,0.02104533314704895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,7168,0.009656533598899841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,7168,0.020610133806864418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,65536,0.09859306812286377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,512,0.00517546683549881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,768,0.005478399991989136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,6144,0.008995200196901958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,256,0.004951466619968414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,6144,0.019267199436823527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,6144,0.012723199526468911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1536,128,0.004749866823355356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,5120,0.008422399560610454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,12288,0.02142080068588257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,5120,0.019568000237147012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,5120,0.011293866237004598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,4096,0.007635200023651123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,4096,0.01847040057182312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,8192,0.015708800156911215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,3072,0.006916266679763794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,3584,0.0074890668193499255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,3584,0.018194133043289186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,3072,0.017723733186721803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,3072,0.008504533767700195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,7168,0.013924266894658408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,2560,0.006404266754786174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,2048,0.016387200355529784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,2560,0.01718613306681315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,2048,0.005760000149408976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,1536,0.004983466863632202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,1536,0.015873066584269204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,1024,0.004220800101757049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,1024,0.015430399775505066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,768,0.003912533322970072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,768,0.01511253317197164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,512,0.0035434665779272715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,4096,0.010057600339253743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,512,0.014804266889890037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,256,0.003236266722281774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,3584,0.009531733393669129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,256,0.014588800072669984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,128,0.003035733352104823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,128,0.014252799749374389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,2560,0.008062933385372163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,2048,0.0071487997968991595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,64,0.002699733277161916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,1536,0.006853333115577698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,64,0.014517333110173544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,1024,32,0.002810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,1024,32,0.014521599809328715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,1024,0.005764266848564148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,65536,0.03673280080159505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,65536,0.05584853490193685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,65536,0.09565119743347168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,16384,0.01260693371295929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,16384,0.024202666680018105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,768,0.0054666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,12288,0.010525866349538168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,12288,0.02165013353029887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,512,0.005141333242257436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,10240,0.009667199850082398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,10240,0.02182933290799459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,256,0.004870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,8192,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,8192,0.02023680011431376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,8192,0.015064533551534018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,7168,0.008354133367538452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,7168,0.019796266158421835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,6144,0.00793280005455017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,6144,0.019036799669265747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,6144,0.012583466370900473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,1024,128,0.0046741331617037455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,5120,0.008551466464996337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,5120,0.019437867403030395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,4096,0.007578666508197785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,4096,0.018579200903574625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,3584,0.007106133302052816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,3584,0.01779200037320455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,3584,0.009525332848230999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,3072,0.006725333134333293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,3072,0.017518933614095053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,3072,0.008307200173536937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,16384,0.026989867289861042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,2560,0.006389333307743073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,2560,0.01702079971631368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,12288,0.02109760046005249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,2560,0.008021333316961924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,10240,0.01812373399734497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,2048,0.005551999807357788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,2048,0.016383999586105348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,2048,0.007021866738796234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,1536,0.0049781332413355505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,1536,0.01562666694323222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,7168,0.013702399532000222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,1024,0.004206933577855428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,1024,0.015447466572125753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,768,0.0038602667550245917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,768,0.015014400084813436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,5120,0.011330133676528931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,512,0.003472000112136205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,512,0.015282133221626281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,512,0.005207466582457224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,256,0.0030805334448814393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,256,0.01436906655629476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,4096,0.009912533561388652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,128,0.003031466652949651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,128,0.014405333002408347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,64,0.0027402666707833606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,64,0.014396799604098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,768,32,0.0027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,768,32,0.014358400305112203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,65536,0.027394133806228637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,65536,0.051229866345723477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,16384,0.010471466183662414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,1536,0.006713599960009258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,16384,0.023383466402689616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,16384,0.026470400889714557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,12288,0.009176533420880635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,1024,0.00573333352804184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,12288,0.020413867632548013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,12288,0.020553600788116456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,10240,0.008610133330027263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,768,0.00544106662273407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,10240,0.020539732774098714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,8192,0.019748266537984213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,10240,0.017820799350738527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,8192,0.00909546713034312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,8192,0.014944000045458474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,7168,0.008312533299128216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,256,0.004862933357556661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,6144,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,768,128,0.004660266637802124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,7168,0.020001065731048585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,6144,0.007560533285140991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,6144,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,5120,0.006846933563550313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,5120,0.019400533040364584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,5120,0.011147733529408772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,4096,0.00636053333679835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,4096,0.01835306684176127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,3584,0.009356799721717834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,3584,0.006842666864395141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,3584,0.01771519978841146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,65536,0.09350506464640299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,3072,0.006444799900054932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,3072,0.008311466872692108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,3072,0.017388800779978432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,2560,0.006331733365853627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,2560,0.01713706652323405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,2048,0.005622399846712748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,2048,0.01665066679318746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,2048,0.007088000078996022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,1536,0.004752000172932943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,1536,0.015689599514007568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,1024,0.004221866528193155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,1024,0.015554133057594299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,7168,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,768,0.004003199934959412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,768,0.015432533621788026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,512,0.0035114665826161706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,512,0.014999467134475707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,256,0.0031690667072931922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,256,0.014398933450380961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,4096,0.010012800494829815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,128,0.00290133332212766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,128,0.01430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,2560,0.008052266637484233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,64,0.0027200000981489818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,64,0.014265599846839904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,512,32,0.002810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,512,32,0.014470400412877402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,65536,0.018641066551208497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,65536,0.0440287987391154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,65536,0.09174719651540121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,16384,0.009287466605504353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,1024,0.005734399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,16384,0.021847466627756752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,12288,0.007761066655317943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,768,0.005297066768010458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,12288,0.020348799228668214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,10240,0.007031466563542683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,512,0.00505920002857844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,10240,0.02000853419303894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,8192,0.006572799881299336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,8192,0.019976532459259032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,256,0.004880000154177348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,7168,0.006493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,7168,0.01938026746114095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,6144,0.0061749334136645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,128,0.0046165332198143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,6144,0.01893226703008016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,6144,0.012429866194725036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,5120,0.0065610667069753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,5120,0.019011199474334717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,5120,0.011249066392580668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,4096,0.006192000210285186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,512,1536,0.006751999755700429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,4096,0.018428800503412883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,3584,0.006724266707897187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,3584,0.018028799692789713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,3584,0.009365333120028178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,3072,0.0063178668419520065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,3072,0.008317866424719492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,3072,0.017458132902781167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,16384,0.025176533063252765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,2560,0.006101333101590474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,12288,0.019977599382400513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,2560,0.017017600933710735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,2560,0.008001066744327545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,2048,0.005468800167242686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,10240,0.017428267002105712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,2048,0.01634880006313324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,8192,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,1536,0.004810666541258494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,1536,0.015953066945075988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,1536,0.006664533416430156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,1024,0.004116266717513402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,1024,0.01546346644560496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,768,0.003786666691303253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,768,0.01535360018412272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,768,0.0053045332431793215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,512,0.0034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,512,0.01507306694984436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,4096,0.009942400455474853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,256,0.003028266628583272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,256,0.014427733421325684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,128,0.002996266633272171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,128,0.01425386667251587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,64,0.002717866748571396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,64,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,256,32,0.0026528000831604003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,256,32,0.014244266351064048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,65536,0.012516267100969949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,65536,0.03850346803665161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,16384,0.0068351998925209045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,65536,0.09249173005421957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,7168,0.013684266805648803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,16384,0.021015467246373494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,16384,0.025587199131647746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,12288,0.006493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,12288,0.019478400548299156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,1024,0.005721599857012431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,10240,0.00660159985224406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,10240,0.020413867632548013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,8192,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,8192,0.018913066387176512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,512,0.004981333514054617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,7168,0.00628053347269694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,7168,0.019406932592391967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,256,0.004769066472848257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,7168,0.01356160044670105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,128,0.004612266520659129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,6144,0.006133333345254262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,6144,0.01875200072924296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,5120,0.006611200173695882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,5120,0.018862932920455933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,256,2048,0.007190399865309398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,5120,0.011238400141398113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,4096,0.006080000102519989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,4096,0.0180021325747172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,4096,0.010083199540774027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,3584,0.006635733445485433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,3584,0.017883733908335368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,3072,0.00622506688038508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,3072,0.017428267002105712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,12288,0.020216532548268638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,2560,0.006150400141874949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,2560,0.016769067446390788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,10240,0.01766293247540792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,2560,0.007956266899903615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,2048,0.005492266515890757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,2048,0.016272000471750894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,1536,0.004699733356634776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,1536,0.015492266416549683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,8192,0.014636799693107605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,1024,0.00411520004272461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,1024,0.01554026703039805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,768,0.003673599908749262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,768,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,6144,0.012380799651145935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,768,0.005425066749254862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,512,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,512,0.014663466811180114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,512,0.005050666630268097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,256,0.0030378667016824085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,256,0.014493866761525472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,128,0.0028789333999156954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,128,0.014266666769981385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,64,0.002738133321205775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,3584,0.009305600325266521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,64,0.014184533556302389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,128,32,0.0026410666604836782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,128,32,0.014235732952753702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,3072,0.008141866823037466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,65536,0.010044800241788228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,65536,0.03668373425801595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,16384,0.006252799928188324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,16384,0.02132586638132731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,12288,0.006230400005976359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,10240,0.006448000172773997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,12288,0.019679999351501463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,10240,0.019723733266194664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,8192,0.0063498665889104204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,1536,0.006714666883150737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,8192,0.019195733467737834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,7168,0.006183466811974844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,7168,0.019074134031931558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,6144,0.006035199761390686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,6144,0.018556799491246542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,5120,0.0064074665307998655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,1024,0.005799466868241628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,4096,0.017990400393803917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,5120,0.018902399142583213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,4096,0.006021333237489065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,3584,0.006431999802589417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,3584,0.017310933272043864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,3072,0.006110933423042297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,3072,0.017214934031168617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,2560,0.0060127998391787205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,256,0.004867200056711833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,2560,0.01683626572291056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,1536,0.015955199797948204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,2048,0.005345066885153452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,2048,0.016378666957219443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,1536,0.004710400104522705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,512,0.003356799980004629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,1024,0.003947733342647553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,512,0.01493013302485148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,1024,0.015082666277885437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,256,0.014615466197331747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,2048,0.007049599786599477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,768,0.0035946667194366455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,768,0.015025066335995993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,256,0.0030421334008375804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,128,0.0028629332780838014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,128,0.014152533809343972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,64,0.0026261332134405774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,64,0.014413866400718688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,64,32,0.0026346666117509207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,16384,0.02073813279469808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,64,32,0.014082133769989014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,65536,0.008701866865158081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,65536,0.03645333449045817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,16384,0.0061749334136645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,12288,0.006186666587988535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,12288,0.01993066668510437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,10240,0.006278400123119354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,10240,0.01967680056889852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,8192,0.006171733140945435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,128,128,128,0.004725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,6144,0.018399999539057414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,8192,0.018908800681432088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,7168,0.00619946668545405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,7168,0.019350399573644005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,6144,0.005941333373387655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,5120,0.006289066871007283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,3584,0.01743040084838867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,5120,0.018820265928904213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,4096,0.005895466605822245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,4096,0.018228266636530557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,3584,0.006504533191521962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,3072,0.006016000111897787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,3072,0.017080533504486083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,2560,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,2560,0.017401599884033205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,2048,0.005314133564631144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,2048,0.016179200013478598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,1536,0.0047882666190465295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,1536,0.01564586659272512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,1024,0.004037333279848098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,1024,0.015093333522478738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,768,0.003740799923737844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,768,0.015366400281588236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,512,0.003337600082159042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,512,0.015245866775512696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,256,0.0030271999537944795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,256,0.014317867159843446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,128,0.0027104000250498454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,64,0.0026229334374268847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,128,0.014326399564743042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,64,0.014308266838391624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,128,32,32,0.0027104000250498454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,128,32,32,0.01393066644668579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,16384,0.283733336130778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,16384,0.5266079902648926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,16384,0.30122559865315757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,12288,0.2177034695943197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,10240,0.33303041458129884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,12288,0.3959850629170736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,10240,0.1850048065185547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,10240,0.1933194637298584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,8192,0.2755008061726888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,8192,0.1519221305847168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,8192,0.1522304058074951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,7168,0.23355520566304527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,7168,0.14171306292215985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,6144,0.2015381336212158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,6144,0.12200853029886882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,6144,0.11576639811197917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,5120,0.17093653678894044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,5120,0.10403520266215008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,5120,0.09833920001983643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,4096,0.1387285391489665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,4096,0.08911253611246744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,3584,0.12094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,3584,0.07846186955769857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,3072,0.10523413022359211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,3072,0.07199467023213704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,3072,0.06642453273137411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,2560,0.08966293334960937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,2560,0.061197865009307864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,2048,0.07362026373545329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,2048,0.05351786613464356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,1536,0.05712533394495646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,1536,0.04405759970347087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,1024,0.04067200024922689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,1536,0.03947519858678182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,1024,0.03592746655146281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,1024,0.029623466730117797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,768,0.031973334153493245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,768,0.03166399995485942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,768,0.02794133424758911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,7168,0.13475626309712727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,512,0.023545600970586143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,512,0.02586666742960612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,256,0.013733333349227906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,256,0.021637332439422608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,128,0.009332266449928284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,4096,0.07987093130747477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,128,0.01967573364575704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,128,0.015499732891718545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,3584,0.07249066829681397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,64,0.007618133227030437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,64,0.018922666708628334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,65536,32,0.006724266707897187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,65536,32,0.01912533243497213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,2560,0.055506134033203126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,12288,0.2222229321797689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,2048,0.04817493359247844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,16384,0.13200106620788574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,16384,0.08517866929372152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,65536,0.3060277303059896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,16384,0.0809216022491455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,65536,0.586733881632487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,12288,0.10085972944895427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,12288,0.06727360089619955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,12288,0.06365866661071777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,10240,0.09274240334828696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,10240,0.06024106740951538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,8192,0.06737279891967773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,512,0.021182932456334434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,8192,0.05154560009638468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,8192,0.04473280111948649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,7168,0.059229866663614905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,7168,0.0466261347134908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,65536,256,0.017416532834370932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,7168,0.03884906768798828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,6144,0.053191467126210534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,6144,0.033580799897511796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,6144,0.04238293170928955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,5120,0.04427093267440796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,5120,0.03802880048751831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,5120,0.030419200658798218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,4096,0.03671679894129436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,3584,0.03256640036900838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,4096,0.033896533648173015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,3584,0.03203733364741008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,3072,0.02853333353996277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,3072,0.02951893409093221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,2560,0.02467733422915141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,2560,0.027139200766881304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,65536,0.30394986470540364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,2048,0.02064746618270874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,2048,0.016345600287119545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,2048,0.025385600328445435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,1536,0.016742400328318276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,1536,0.021913599967956544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,1024,0.011907200018564861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,1024,0.02015786568323771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,1024,0.011181867122650147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,768,0.00965013305346171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,768,0.01861226757367452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,512,0.007706666489442189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,512,0.016457600394884746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,4096,0.024945066372553507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,256,0.005165866514046987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,3584,0.023105067014694215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,256,0.016084266702334087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,3072,0.020965333779652914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,128,0.0042250668009122215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,128,0.015382400155067444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,10240,0.05191466808319092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,128,0.006839466591676076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,2560,0.01948480010032654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,64,0.003928533444801966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,16384,32,0.004092800120512644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,64,0.01555519998073578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,16384,32,0.015910399953524272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,1536,0.013576533397038779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,16384,0.10979413191477458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,16384,0.06930452982584635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,65536,0.23575253486633302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,65536,0.4247018814086914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,768,0.01055680016676585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,12288,0.0852661371231079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,12288,0.058373332023620605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,12288,0.054426666100819907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,10240,0.07377813657124838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,10240,0.04955733219782511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,8192,0.05325226783752442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,512,0.008332799871762593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,8192,0.04352000157038371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,7168,0.04628906647364299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,7168,0.03968000014623006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,16384,256,0.00735040009021759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,6144,0.04064106543858846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,6144,0.03619306484858195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,5120,0.03487253189086914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,5120,0.03300266663233439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,4096,0.029270400603612263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,4096,0.02943039933840434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,4096,0.021810134251912437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,3584,0.02571093241373698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,65536,0.25825279553731284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,3584,0.02768639922142029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,16384,0.06883947054545084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,3072,0.022849067052205404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,3072,0.025465599695841473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,2560,0.019629865884780884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,10240,0.044164268175760905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,2560,0.02338026762008667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,2560,0.015877333283424378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,8192,0.03783786694208781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,2048,0.0164000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,2048,0.022242132822672525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,7168,0.032679466406504314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,1536,0.012935466567675271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,1536,0.02067626714706421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,1024,0.009629866480827332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,6144,0.028331732749938963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,1024,0.01920106609662374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,1024,0.009799466530481974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,768,0.008257066706816356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,5120,0.025677865743637084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,768,0.018175999323527016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,768,0.008750933408737182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,512,0.007321600119272869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,512,0.015966932972272238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,512,0.0072522665063540145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,256,0.004487466812133789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,3584,0.020497065782546998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,256,0.01560640037059784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,128,0.003882666677236557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,128,0.015038933356602988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,128,0.0058794667323430385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,3072,0.017552000284194947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,64,0.015245866775512696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,32,0.0036533333361148832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,12288,64,0.0036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,12288,32,0.015500799814860026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,2048,0.01336853305498759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,65536,0.21152639389038086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,65536,0.3635306676228841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,16384,0.09820799827575684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,65536,0.24932799339294434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,1536,0.011562666296958924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,12288,0.08542613188425699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,16384,0.0722378651301066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,12288,0.05048640171686808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,12288,0.050188799699147545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,10240,0.0678335984547933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,10240,0.04118826786677043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,10240,0.045365333557128906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,8192,0.05384853283564249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,8192,0.03969493309656779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,7168,0.03972053527832031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,7168,0.03629333178202311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,6144,0.03512853384017944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,6144,0.026165332396825152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,6144,0.03337173461914063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,5120,0.03128640055656433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,5120,0.030521599451700847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,12288,256,0.006343466540177663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,4096,0.024972800413767496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,4096,0.027281065781911213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,3584,0.02258346676826477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,3584,0.025579732656478883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,3072,0.01991893251736959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,3072,0.02414399981498718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,3072,0.016438399751981102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,2560,0.017879466215769448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,2560,0.022795732816060385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,2048,0.014808533589045205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,16384,0.06434666713078817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,2048,0.021640533208847047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,2048,0.012601600090662638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,1536,0.011653332908948263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,1536,0.020143999656041463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,8192,0.03474453290303548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,1024,0.009226666887601216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,1024,0.009149866302808125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,1024,0.0188810666402181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,7168,0.030881067117055256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,768,0.007833600044250488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,768,0.016992000738779704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,768,0.007709866762161255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,512,0.006683733562628429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,512,0.016289066274960837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,512,0.006680533289909363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,5120,0.023478400707244874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,256,0.0045162667830785114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,256,0.015571199854214988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,4096,0.02003306746482849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,128,0.003841066608826319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,128,0.015345066785812378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,128,0.005500799914201101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,64,0.003455999990304311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,3584,0.018312533696492515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,64,0.01516480048497518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,10240,32,0.0036831999818483984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,10240,32,0.015173332889874777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,2560,0.014621866742769876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,65536,0.2750048001607259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,65536,0.17840107282002765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,16384,0.07323520183563233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,16384,0.0537280003229777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,12288,0.06311893463134766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,12288,0.04452053308486938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,10240,0.044913065433502194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,1536,0.010754133264223736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,10240,0.039538133144378665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,8192,0.03713173468907674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,8192,0.03545386791229248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,7168,0.032434133688608806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,7168,0.03257493376731872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,6144,0.02866986592610677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,6144,0.02994026740392049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,10240,256,0.005994666616121928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,5120,0.02455893357594808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,5120,0.02709226608276367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,16384,0.05641813278198242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,65536,0.2142730712890625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,4096,0.020920532941818237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,4096,0.024614399671554564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,4096,0.01816640098889669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,10240,0.03679146766662598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,3584,0.01882666746775309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,8192,0.03258133331934611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,3584,0.023471999168395995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,3072,0.01651306649049123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,3072,0.02230506738026937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,2560,0.014150399963061014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,7168,0.02781333327293396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,2560,0.021177599827448525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,2560,0.013707733154296875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,6144,0.023639466365178427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,2048,0.011845333377520244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,2048,0.019598933060963948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,1536,0.009699199597040813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,1536,0.01877546707789103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,5120,0.021132800976435342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,1536,0.010645332932472228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,1024,0.007582933207352956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,1024,0.016375466187795003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,768,0.006589866677920024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,768,0.016165332992871602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,12288,0.04340266784032186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,512,0.004855466882387797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,512,0.015915733575820924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,256,0.003982933362325033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,256,0.015331199765205384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,128,0.0035146666069825493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,3584,0.017128533124923705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,3072,0.015390933553377787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,128,0.014974932869275412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,64,0.003173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,64,0.01532586713631948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,8192,32,0.003492266684770584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,8192,32,0.015176533659299215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,2048,0.012141866485277812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,65536,0.2576181411743164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,65536,0.16013867060343426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,16384,0.07353920141855876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,16384,0.04881706635157267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,1024,0.008488532900810242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,12288,0.04577813148498535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,768,0.007638399799664815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,12288,0.04052053292592366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,10240,0.03984640041987102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,10240,0.03685119946797689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,512,0.006916266679763794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,8192,0.03298986752827962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,8192,0.03291093309720357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,256,0.0061749334136645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,7168,0.028655999898910524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,7168,0.030667734146118165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,8192,128,0.005718400080998739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,6144,0.025203200181325276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,6144,0.027903999884923297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,6144,0.023389865954717003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,5120,0.021921066443125407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,5120,0.02574719985326131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,4096,0.018956800301869713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,4096,0.023373866081237794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,65536,0.21294506390889487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,3584,0.017780266205469766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,16384,0.05922453403472901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,3584,0.022155733903249104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,12288,0.043619199593861895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,3072,0.014618666966756186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,3072,0.021461333831151327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,10240,0.036263465881347656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,2560,0.012556800246238708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,2560,0.020518400271733604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,8192,0.031779199838638306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,2048,0.010571733117103577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,2048,0.019314134120941163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,1536,0.008754133184750875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,7168,0.027848533789316815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,1536,0.018241065740585326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,1536,0.009942400455474853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,1024,0.007022933165232341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,1024,0.01586026648680369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,768,0.00602346658706665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,5120,0.02097919980684916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,768,0.015749333302179973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,512,0.004350933432579041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,4096,0.018016000588734947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,512,0.015713066856066386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,256,0.003730133424202601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,3584,0.016134400169054666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,256,0.015470932920773825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,3072,0.014653866489728292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,128,0.003453866640726725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,128,0.014899200201034546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,64,0.0032992000381151833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,2560,0.013166933258374532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,64,0.014991999665896098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,7168,32,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,7168,32,0.015148799618085226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,2048,0.011245866616566975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,65536,0.20699092547098794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,65536,0.1382559935251872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,16384,0.065557332833608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,1024,0.00841813286145528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,16384,0.045390931765238444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,12288,0.04769386847813924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,768,0.0071050668756167095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,12288,0.03771413167317708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,10240,0.040151464939117434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,512,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,10240,0.03440106709798177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,8192,0.02904319961865743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,256,0.005756799876689911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,8192,0.03113493323326111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,8192,0.03172159989674886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,7168,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,7168,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,7168,128,0.005302399893601735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,7168,0.026828799645106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,6144,0.022683733701705934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,6144,0.026412800947825117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,5120,0.019900800784428914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,5120,0.024345600605010988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,4096,0.01663040022055308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,4096,0.022499199708302817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,65536,0.20458985964457194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,16384,0.057657599449157715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,3584,0.01511679987112681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,3584,0.02119999925295512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,3072,0.013099732995033263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,12288,0.04002453486124675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,3072,0.020734934012095134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,2560,0.011224533120791119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,2560,0.0196288009484609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,10240,0.03444799979527791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,2048,0.0095551997423172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,2048,0.018284799655278523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,1536,0.008106666803359985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,1536,0.01769066651662191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,6144,0.022832000255584718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,1024,0.006525866687297821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,1024,0.01609386702378591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,4096,0.01697173317273458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,768,0.005564799904823304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,3584,0.015849600235621132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,768,0.016105600198109946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,512,0.004281599819660187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,3072,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,512,0.015373866756757101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,2560,0.012456533312797547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,256,0.003735466549793879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,256,0.014932266871134438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,256,0.0053941334287325535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,2048,0.010675199826558431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,128,0.0035018667578697203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,128,0.014729600151379904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,64,0.0032170665760835014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,64,0.014906666676203408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,6144,32,0.0032992000381151833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,5120,0.019832533597946168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,1536,0.00960640013217926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,6144,32,0.015001599987347921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,65536,0.18224426905314128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,65536,0.12206506729125977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,1024,0.007592533528804779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,16384,0.05122133493423462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,16384,0.0455946683883667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,12288,0.04103893438975016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,12288,0.03532586495081584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,10240,0.03578986724217732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,10240,0.033225599924723306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,8192,0.03025919993718465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,768,0.006840533514817555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,8192,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,7168,0.027552000681559247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,7168,0.02658240000406901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,512,0.006157866617043813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,7168,0.025544534126917522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,6144,128,0.005123200019200643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,6144,0.020386133591334024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,6144,0.0245578666528066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,5120,0.017908267180124917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,5120,0.023128533363342287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,4096,0.014765866597493491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,4096,0.021617066860198975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,65536,0.19204373359680177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,3584,0.014012799660364787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,12288,0.03911893367767334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,3584,0.02082560062408447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,10240,0.033405868212382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,3072,0.011766399939854939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,3072,0.020117332537968956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,2560,0.010307199756304423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,8192,0.02908693353335063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,2560,0.019308799505233766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,2048,0.008890666564305623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,2048,0.018838399648666383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,2048,0.010488532980283101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,6144,0.021747199694315593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,1536,0.007525333265463512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,1536,0.016566399733225504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,5120,0.019335466623306274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,1024,0.006353066861629486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,16384,0.054626135031382236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,1024,0.016734933853149413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,768,0.004837333162625631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,4096,0.016528000434239708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,768,0.016377600034077962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,512,0.004158933212359746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,512,0.01574186682701111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,3584,0.014948266744613647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,512,0.006111999849478403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,256,0.0036906667053699495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,256,0.015050666530927024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,256,0.005573333303133646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,3072,0.01360640029112498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,128,0.0033728001018365227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,128,0.01461120049158732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,64,0.003099733342727025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,2560,0.012244266271591187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,5120,32,0.0032479998966058097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,64,0.014897066354751586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,5120,32,0.014707199732462563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,65536,0.14629653294881184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,65536,0.10424213409423828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,16384,0.045362134774525956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,1536,0.008937600255012512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,16384,0.039655466874440506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,12288,0.0335264007250468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,12288,0.034320000807444254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,10240,0.02876693407694499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,10240,0.031565866867701214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,768,0.006728533407052357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,8192,0.024694399038950602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,8192,0.028654932975769043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,128,0.005030400057633718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,7168,0.022317866484324135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,7168,0.02606933315594991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,65536,0.18624213536580403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,6144,0.020188800493876138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,16384,0.047432533899943036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,5120,1024,0.007643733421961467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,6144,0.02427519957224528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,12288,0.03656853437423706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,5120,0.017940266927083334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,5120,0.021713066101074218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,5120,0.01781439979871114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,4096,0.015148799618085226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,4096,0.02055466572443644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,10240,0.029411200682322187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,3584,0.013123200337092081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,3584,0.019789866606394448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,3072,0.011771733562151592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,3072,0.0195360004901886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,8192,0.026610134045283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,2560,0.008827733000119527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,2560,0.01871573328971863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,2560,0.01160426636536916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,2048,0.00772159993648529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,2048,0.017145599921544394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,1536,0.006695466736952464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,1536,0.01604159971078237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,1024,0.005055999755859375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,7168,0.023371734221776328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,1024,0.015867732961972556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,768,0.004390400151411692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,768,0.015452800194422403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,6144,0.020312533775965372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,512,0.0039061332742373147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,512,0.015307733416557312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,4096,0.015347199638684592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,256,0.0035125332574049628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,3584,0.014409599701563516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,256,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,128,0.003218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,128,0.014654933412869772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,2048,0.009829333424568177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,64,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,1536,0.008389332890510559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,64,0.01502293348312378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,4096,32,0.003402666747570038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,4096,32,0.01507306694984436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,1024,0.0072053333123524976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,65536,0.1334762732187907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,65536,0.09970986843109131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,16384,0.04080959955851237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,16384,0.03723733425140381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,12288,0.02881493369738261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,12288,0.03237760066986084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,512,0.0059445331494013464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,3072,0.013175466656684875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,10240,0.02566293279329936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,10240,0.030612266063690184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,256,0.00553173323472341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,8192,0.02213653326034546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,8192,0.026162133614222212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,7168,0.019831466674804687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,128,0.0050805335243542995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,7168,0.024358399709065757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,7168,0.02336853345235189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,6144,0.017912532885869345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,6144,0.023517866929372154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,4096,768,0.006696533163388569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,65536,0.15795733133951823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,5120,0.015892266233762106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,16384,0.044402134418487546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,5120,0.02100800077120463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,4096,0.012591999769210816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,4096,0.019913599888483683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,3584,0.011352533102035522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,12288,0.03419306675593058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,3584,0.01979093352953593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,10240,0.02948906620343526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,3072,0.010305066903432209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,3072,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,3072,0.018703999121983846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,2560,0.00844586690266927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,2560,0.018016000588734947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,2048,0.007359999914964039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,2048,0.017046399911244712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,6144,0.019248000780741372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,1536,0.006571733454863231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,1536,0.016032000382741295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,1024,0.004990933338801066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,1024,0.015997866789499916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,4096,0.014661332964897156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,768,0.004215466479460398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,8192,0.025234133005142212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,3584,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,768,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,512,0.0038165333370367683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,512,0.01536853313446045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,256,0.0034282666941483817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,2560,0.011008000373840332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,256,0.014738133549690247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,128,0.0031541332602500914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,2048,0.009278933207194012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,128,0.014562132954597472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,64,0.002948266764481862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,1536,0.008170666793982189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,64,0.014578133821487427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3584,32,0.003110400090614955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3584,32,0.01470080018043518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,5120,0.017032533884048462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,65536,0.11275946299235026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,1024,0.006916266679763794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,65536,0.09040106932322184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,16384,0.0368010679880778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,16384,0.035596799850463864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,65536,0.17757333119710286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,12288,0.025948800643285114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,12288,0.03022186756134033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,10240,0.023033599058787026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,10240,0.02746559977531433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,10240,0.031100799640019734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,8192,0.020195200045903524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,8192,0.02503146727879842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,512,0.005807999769846598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,7168,0.01778986652692159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,7168,0.02435093323389689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,256,0.005385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,6144,0.015969066818555196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,6144,0.023285333315531412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,128,0.005061333378156027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,5120,0.014386133352915446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,5120,0.02122559944788615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,4096,0.012213333447774252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,4096,0.019511467218399046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,16384,0.04913813273111979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3584,768,0.0065087998906771345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,3584,0.01160533328851064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,3584,0.019323732455571493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,3072,0.009187199672063192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,8192,0.026394667228062947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,3072,0.018662399053573607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,2560,0.008213333288828532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,7168,0.024660267432530723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,2560,0.017310933272043864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,2560,0.011014399925867717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,6144,0.020076799392700195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,2048,0.007394133508205414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,2048,0.01653333306312561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,2048,0.009468799829483033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,1536,0.006090666850407918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,1536,0.016134400169054666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,5120,0.01796906590461731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,1024,0.004878933231035868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,1024,0.015522133310635886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,12288,0.03635840018590291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,4096,0.015169066190719605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,768,0.004353066782156626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,768,0.015256533026695251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,512,0.003885866701602936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,512,0.015233066678047181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,256,0.0033589333295822145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,256,0.014838400483131408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,3584,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,128,0.0032255999743938447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,128,0.014475733041763306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,64,0.003013333429892858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,3072,0.012877866625785828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,64,0.014663466811180114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,3072,32,0.0029898665845394133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,3072,32,0.014869333306948344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,65536,0.09590506553649902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,65536,0.08133119742075602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,1536,0.008269866804281871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,16384,0.028065067529678345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,1024,0.007163733243942261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,16384,0.03193813363711039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,768,0.006552533308664958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,16384,0.04582293430964152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,12288,0.0224671999613444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,12288,0.027536000808080035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,10240,0.01970133384068807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,10240,0.0258026659488678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,512,0.00584853341182073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,8192,0.01690346598625183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,256,0.005388799806435903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,8192,0.024286933739980063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,7168,0.015204266707102457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,7168,0.022873600323994957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,6144,0.013521066308021546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,6144,0.02196906606356303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,5120,0.013168000181516013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,5120,0.020907733837763467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,65536,0.15537919998168945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,4096,0.011100799838701884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,4096,0.019784533977508546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,12288,0.03376106818517049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,3584,0.010309333602587383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,10240,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,3072,128,0.005050666630268097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,3584,0.01876693367958069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,8192,0.02630400061607361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,3072,0.009646933277448018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,7168,0.022634667158126832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,3072,0.017528533935546875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,2560,0.008011733492215473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,2560,0.017059199015299478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,2048,0.007314133147398631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,2048,0.016521599888801575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,6144,0.018589866161346436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,1536,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,1536,0.01591679950555166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,1024,0.004598399996757508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,1024,0.01548373301823934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,5120,0.01611733337243398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,768,0.004148266712824504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,768,0.01556373337904612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,4096,0.014261333147684732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,512,0.0036981334288915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,512,0.015195733308792115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,3584,0.012328533331553142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,256,0.0034634667138258614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,3072,0.01121493379275004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,256,0.01495039959748586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,2560,0.00983893374602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,128,0.0032085334261258446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,128,0.014493866761525472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,64,0.002948266764481862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,64,0.014575999975204468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,1536,0.007740800082683563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2560,32,0.002924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2560,32,0.01456106702486674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,65536,0.07527360121409098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,1024,0.006854400038719177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,65536,0.071124267578125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,16384,0.024177066485087075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,16384,0.02893120050430298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,768,0.006221866607666016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,12288,0.01923840045928955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,2048,0.008748799562454224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,12288,0.02489173412322998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,10240,0.016754132509231568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,10240,0.023735467592875162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,512,0.005702400207519531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,8192,0.014089600245157877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,8192,0.0232586661974589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,7168,0.012930132945378623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,256,0.005447466671466827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,7168,0.02238933245340983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2560,128,0.004940799872080485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,6144,0.0116565336783727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,6144,0.0208512008190155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,5120,0.011346133550008138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,5120,0.020106667280197145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,16384,0.04219520092010498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,12288,0.03271999955177307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,4096,0.009946667154630025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,4096,0.019802665710449217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,10240,0.02764799992243449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,3584,0.008992000420888265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,3584,0.018504534165064493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,3584,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,3072,0.008263466755549113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,8192,0.024231467644373575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,3072,0.017505067586898803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,2560,0.007681066791216533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,65536,0.16226132710774738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,2560,0.01725546717643738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,2048,0.006696533163388569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,7168,0.021296000480651854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,2048,0.01648319959640503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,2048,0.009164800246556599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,6144,0.01844053268432617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,1536,0.005456000069777171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,1536,0.01620693306128184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,1024,0.004358399907747904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,1024,0.015687466661135355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,768,0.00405973345041275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,5120,0.01616853376229604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,768,0.015459199746449789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,512,0.003638399889071783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,512,0.015025066335995993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,4096,0.014265599846839904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,256,0.0033258666594823206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,256,0.014553599556287131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,3072,0.011664000153541566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,128,0.0031136001149813337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,2560,0.010309333602587383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,128,0.014419200023015341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,128,0.004902400076389313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,64,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,64,0.014413866400718688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,2048,32,0.002962133288383484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,2048,32,0.014612266421318054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,1536,0.008158933122952778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,65536,0.06113066673278809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,1024,0.006804266571998596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,65536,0.06374826828638712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,16384,0.019886932770411172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,16384,0.02563199996948242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,768,0.006203733384609222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,12288,0.015482667088508605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,12288,0.023215999205907188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,10240,0.013240533073743186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,10240,0.025405865907669068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,512,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,8192,0.011351466178894043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,8192,0.02270080049832662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,7168,0.01055999994277954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,7168,0.022316799561182658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,6144,0.009958400328954061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,6144,0.020980266729990642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,2048,256,0.005205333232879639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,5120,0.011084799965222675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,5120,0.01956160068511963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,65536,0.13025813102722167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,4096,0.009588266412417095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,16384,0.03676053285598755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,12288,0.028065067529678345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,4096,0.0182751993338267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,3584,0.008929066856702169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,3584,0.018077866236368815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,10240,0.023539199431737264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,3072,0.008083199958006541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,8192,0.022293333212534586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,3072,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,2560,0.006820266445477803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,7168,0.018658133347829182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,2560,0.017086933056513466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,2048,0.005738666653633118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,2048,0.01618880033493042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,1536,0.005019733309745788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,6144,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,1536,0.01598186691602071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,1024,0.004296533266703288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,5120,0.01477226714293162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,1024,0.015405866503715514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,768,0.003949866692225138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,768,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,4096,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,512,0.00365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,3584,0.011686399579048157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,512,0.01495146652062734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,3072,0.010571733117103577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,256,0.0032458665470282235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,256,0.014504533012708029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,2560,0.008964266379674275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,128,0.002932266642649968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,128,0.014346667130788169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,2048,0.007919999957084655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,64,0.0029674666623274487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1536,32,0.002958933264017105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,64,0.014344533284505209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,1536,0.007277866701285045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1536,32,0.01453013320763906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,65536,0.0409610668818156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,65536,0.05313813289006551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,16384,0.014435199896494546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,1024,0.006347733239332835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,16384,0.025111466646194458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,16384,0.030774400631586714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,12288,0.011740799744923909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,12288,0.023257599274317423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,12288,0.02412266731262207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,10240,0.010713600118954976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,768,0.005847466488679251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,10240,0.022107734282811483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,8192,0.011617066462834676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,8192,0.02039360006650289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,7168,0.01045973300933838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,7168,0.02004693349202474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,512,0.0053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,6144,0.009810133775075277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,6144,0.019345066944758096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,256,0.004939733445644379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1536,128,0.004822400212287903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,5120,0.009091200431187947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,5120,0.019488000869750978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,4096,0.008462933699289958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,4096,0.01838399966557821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,65536,0.11365760167439778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,3584,0.007749333480993907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,3584,0.017723733186721803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,3584,0.009460266431172688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,3072,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,10240,0.02057173252105713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,3072,0.0175872008005778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,8192,0.017382399241129557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,2560,0.006387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,2560,0.01716053287188212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,2048,0.005640533566474914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,7168,0.014491732915242514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,2048,0.01633386711279551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,1536,0.0049898669123649595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,1536,0.01575040022532145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,6144,0.013119999567667642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,1024,0.004370133578777313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,1024,0.01554026703039805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,1024,0.0058773333827654515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,768,0.003977599988381068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,5120,0.011613866686820984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,768,0.015304533640543619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,512,0.0035189333061377203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,512,0.01483626663684845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,4096,0.010099200407663982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,256,0.0032032000521818793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,256,0.014633599917093912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,256,0.004987733562787374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,128,0.002810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,128,0.014281599720319112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,64,0.002752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,3072,0.008424533406893413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,64,0.014337066809336343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,1024,32,0.0027722666660944624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,2560,0.007963733375072479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,1024,32,0.014379733800888061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,65536,0.03547093470891317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,65536,0.04895999828974406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,16384,0.011315199732780456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,2048,0.007179733117421467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,16384,0.022999467452367146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,12288,0.01097706655661265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,1536,0.00680320014556249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,12288,0.022405334313710532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,10240,0.010039466619491576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,10240,0.020637865861256918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,10240,0.018692266941070557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,768,0.005514666438102722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,8192,0.010609066486358643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,8192,0.01983786622683207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,7168,0.008898133039474487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,512,0.0051584000388781226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,7168,0.01959679921468099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,6144,0.009009066224098205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,6144,0.01909439961115519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,5120,0.007673599819342296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,1024,128,0.0047189335028330484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,5120,0.019144533077875774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,4096,0.006746666630109151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,4096,0.01827413241068522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,65536,0.10325866540273029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,3584,0.007053866485754649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,16384,0.02807253400484721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,3584,0.017903999487559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,12288,0.020860799153645835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,3072,0.00655680000782013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,3072,0.01686613361040751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,3072,0.008329600095748901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,2560,0.00621973325808843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,2560,0.016910932461420693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,2560,0.007964799801508587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,8192,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,2048,0.005523199836413065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,2048,0.01646506687005361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,7168,0.013645866513252258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,6144,0.012397866447766621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,1536,0.004923733572165171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,1536,0.015662933389345803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,1024,0.00415786678592364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,1024,0.015159466862678527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,1024,0.005849599838256836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,5120,0.010851200421651204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,768,0.0038090666135152185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,768,0.01488746702671051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,512,0.003403733422358831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,512,0.014773333072662353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,512,0.005157333115736643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,4096,0.009567999839782714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,256,0.003047466774781545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,256,0.014899200201034546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,256,0.004840533435344696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,128,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,128,0.014318933089574179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,3584,0.009258666634559631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,64,0.002696533252795537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,64,0.014152533809343972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,768,32,0.0027615999182065325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,768,32,0.014514133334159851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,65536,0.025703465938568114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,65536,0.04528533220291138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,65536,0.09236906369527181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,16384,0.011426132917404175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,16384,0.021448532740275063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,2048,0.007020799815654755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,12288,0.009690666198730468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,12288,0.01917653282483419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,1536,0.006679466863473256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,10240,0.00885973374048869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,10240,0.019689599672953285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,10240,0.01718613306681315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,8192,0.007946666578451793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,8192,0.019112533330917357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,7168,0.007484800120194752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,768,0.005323733389377594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,7168,0.01938026746114095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,6144,0.006910933554172516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,6144,0.01880853374799093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,6144,0.012235732873280843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,5120,0.006785066425800323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,5120,0.019322667519251505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,5120,0.010613333185513813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,768,128,0.004648533463478088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,4096,0.018032000462214152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,4096,0.006357333560784657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,4096,0.009381332993507385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,3584,0.006678399940331777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,3584,0.017476266622543334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,3584,0.009195733070373534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,3072,0.006367999811967213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,3072,0.01702079971631368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,2560,0.006154666841030121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,2560,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,2560,0.00787306676308314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,16384,0.025910399357477826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,2048,0.005499733487764994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,2048,0.016155733664830526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,12288,0.020097066958745323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,2048,0.007048533360163371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,1536,0.004795733094215393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,1536,0.01558080017566681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,1536,0.006493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,1024,0.004240000247955322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,8192,0.014868266383806863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,7168,0.013482667009035745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,1024,0.015200000007947287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,1024,0.005779199798901876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,768,0.0038442666331926978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,768,0.01511253317197164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,512,0.0035455999275048576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,512,0.014703999956448874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,512,0.004966400067011515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,128,0.002865066627661387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,256,0.0031040000418821974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,256,0.014570666352907815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,256,0.004808533191680908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,128,0.014220800002415976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,128,0.00468800018231074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,64,0.002661333233118057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,64,0.01430506706237793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,512,32,0.002845866729815801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,512,32,0.014311466614405313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,3072,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,16384,0.020786132415135702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,65536,0.018939733505249023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,12288,0.00716480016708374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,65536,0.036817065874735516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,16384,0.008172800143559773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,16384,0.024312533934911094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,12288,0.019527467091878255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,10240,0.006730666756629944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,10240,0.02012373407681783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,8192,0.01885653336842855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,10240,0.01684160033861796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,8192,0.0065194666385650635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,8192,0.01469013293584188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,512,768,0.00528959979613622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,7168,0.006445866823196411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,7168,0.01952426632245382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,5120,0.006538666784763336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,6144,0.0061706667145093284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,6144,0.018465065956115724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,6144,0.01209493378798167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,5120,0.019038933515548705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,4096,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,4096,0.01802133321762085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,4096,0.009444266557693481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,3584,0.006794666747252147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,3584,0.017406932512919106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,65536,0.08759786287943522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,3072,0.006364800035953522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,3072,0.017138133446375527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,2560,0.006098133325576782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,12288,0.01957546671231588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,2560,0.017177599668502807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,2048,0.0054613331953684485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,2048,0.016293332974116007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,1536,0.004844800134499868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,1536,0.015922133127848306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,7168,0.01328426698843638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,1024,0.004098133246103922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,1024,0.015398400028546652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,1024,0.005709866682688395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,5120,0.010547199845314026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,768,0.0037280000746250153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,768,0.015128533045450846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,768,0.00526506652434667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,512,0.0033632000287373864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,512,0.014893866578737893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,512,0.005032533407211303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,3584,0.009087999661763508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,256,0.0030218665798505146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,256,0.014250666896502177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,3072,0.008278400202592214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,128,0.002867199977238973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,128,0.014169599612553915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,128,0.004455466568470001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,2560,0.007773866752783458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,32,0.002665599932273229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,256,64,0.0026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,64,0.013946666320164999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,256,32,0.014430933197339377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,2048,0.006859733164310456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,65536,0.01076479951540629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,65536,0.03300266663233439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,16384,0.006495999793211619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,16384,0.024641066789627075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,65536,0.08800426324208578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,16384,0.02015893260637919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,12288,0.006297599772612255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,12288,0.019721599419911702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,10240,0.006583466629187266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,10240,0.01938026746114095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,10240,0.01694933374722799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,8192,0.006457599997520447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,8192,0.018634666999181114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,8192,0.014657066265741984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,7168,0.006262399752934774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,7168,0.01882879932721456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,6144,0.006121600170930227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,6144,0.01862293283144633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,5120,0.006424533327420552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,256,0.004779733220736186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,5120,0.018931200106938682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,5120,0.010730666915575664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,4096,0.006042666733264923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,4096,0.018389334281285606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,4096,0.009318400422732036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,3584,0.006532266736030579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,3584,0.017433599630991618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,3072,0.006307200094064077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,3072,0.017534933487574258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,256,1536,0.006680533289909363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,3072,0.008101333181063335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,2560,0.006075733403364817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,2560,0.01634239951769511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,2048,0.005460266768932342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,2048,0.016207999984423318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,12288,0.019645865758260092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,1536,0.004774400095144907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,1536,0.015451733271280924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,1024,0.00407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,1024,0.015356799960136414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,1024,0.005686399837334951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,768,0.0036117332677046456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,7168,0.01340053379535675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,768,0.016053332885106405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,768,0.005358933409055074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,512,0.0034058667719364167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,512,0.01470186710357666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,6144,0.012019200126330058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,256,0.0029728000362714132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,256,0.01458453337351481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,128,0.002809600035349528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,128,0.01418239971001943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,128,0.004647466540336609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,64,0.0027104000250498454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,3584,0.009032533566157023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,64,0.014339199662208557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,128,32,0.0028064000109831494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,128,32,0.01415786643822988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,65536,0.00904319981733958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,65536,0.031146667400995892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,16384,0.006177066763242086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,16384,0.019798400004704793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,12288,0.006123733520507812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,2048,0.00694400022427241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,12288,0.020632533232371013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,10240,0.00624533345301946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,8192,0.00619946668545405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,10240,0.019356799125671387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,1536,0.00653653343518575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,8192,0.018921599785486857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,7168,0.006049066781997681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,7168,0.019578667481740315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,6144,0.006033066908518473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,6144,0.018168532848358156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,5120,0.00633493314186732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,5120,0.018407466014226277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,4096,0.005896533528963724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,512,0.005125333368778229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,4096,0.01787839929262797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,3584,0.006397866706053417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,3584,0.017655466000239053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,3072,0.006074666480223338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,2560,0.01618666648864746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,256,0.00477866679430008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,3072,0.017644800742467246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,2560,0.005995733539263407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,2048,0.005409066875775656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,2048,0.015957333644231162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,1536,0.004681600133577982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,96,128,2560,0.007799466451009114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,1536,0.015642666816711427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,1024,0.003985066711902618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,1024,0.015042133132616677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,768,0.0036415999134381616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,768,0.014913066228230795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,512,0.0032661333680152893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,512,0.01472106675306956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,256,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,256,0.014567466576894126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,128,0.00276053324341774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,128,0.014338133732477823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,64,0.0025909334421157838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,64,0.014181333780288696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,64,32,0.0027146667242050173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,64,32,0.014254933595657349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,12288,0.019186133146286012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,65536,0.009012266993522644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,65536,0.03110719919204712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,16384,0.00621973325808843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,16384,0.020682666699091592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,12288,0.005994666616121928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,10240,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,10240,0.019942400852839152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,6144,0.018488534291585288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,8192,0.00618453323841095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,8192,0.018154666821161906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,7168,0.006150400141874949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,4096,0.017602133750915527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,7168,0.018644267320632936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,6144,0.005939200023810069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,5120,0.006325333317120869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,5120,0.018582399686177573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,4096,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,3584,0.006372266511122386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,3584,0.017638399203618368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,3072,0.006021333237489065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,3072,0.01720213294029236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,2560,0.006161066889762879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,2560,0.01648640036582947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,2048,0.00544213354587555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,2048,0.015965867042541503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,1536,0.004737066725889841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,1536,0.015597866972287497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,1024,0.004010666658480962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,1024,0.01514240006605784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,768,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,768,0.01492693324883779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,512,0.0033610666791598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,512,0.014747732877731323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,256,0.0030069333811601003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,256,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,128,0.002844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,128,0.013992533087730408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,64,0.002570666621128718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,64,0.014184533556302389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,96,32,32,0.0025802666942278544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,96,32,32,0.014229333400726319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,16384,0.3025418599446615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,16384,0.5666751861572266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,12288,0.23297386169433593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,12288,0.4285418510437012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,12288,0.24867413838704427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,10240,0.35853439966837564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,10240,0.19925333658854166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,10240,0.20335680643717446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,8192,0.16239892641703288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,8192,0.2978111902872721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,7168,0.25477546056111655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,7168,0.14918293952941894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,7168,0.14739306767781574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,6144,0.21876160303751627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,6144,0.1276256004969279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,5120,0.18528960545857748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,5120,0.10847787062327068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,5120,0.11002986431121826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,4096,0.0903167963027954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,4096,0.15600426991780597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,3584,0.13055893580118816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,4096,0.08969386418660483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,3584,0.08164587020874023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,3584,0.07789866924285889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,3072,0.1145407994588216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,3072,0.06826879978179931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,3072,0.07341226736704508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,2560,0.09536853631337484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,2560,0.0636949340502421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,2560,0.05866133371988932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,2048,0.07742613156636556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,2048,0.05559466679890951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,2048,0.048571733633677165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,16384,0.3154357274373373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,1536,0.060242132345835364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,1536,0.04622720082600911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,1024,0.04388800064722697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,1024,0.029099732637405396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,1024,0.039290666580200195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,768,0.03398826519648234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,8192,0.17221973737080892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,768,0.032407466570536295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,768,0.02746880054473877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,512,0.02484053373336792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,512,0.026654932896296186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,6144,0.12834453582763672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,256,0.014289066195487976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,256,0.021406932671864828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,256,0.017011199394861856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,128,0.009723732868830364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,128,0.020011732975641884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,64,0.018227199713389076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,128,0.015270400047302245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,64,0.0075552001595497135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,65536,32,0.007038933535416921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,65536,32,0.018897066513697304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,16384,0.1290453354517619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,16384,0.08566293716430665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,16384,0.0853877305984497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,65536,0.5724576314290364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,65536,0.29954134623209633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,1536,0.03889919916788737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,65536,0.3142677307128906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,12288,0.09824426968892416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,12288,0.06642239888509115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,12288,0.06871999899546305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,10240,0.0927071968714396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,10240,0.05609920024871826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,10240,0.06292266845703125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,7168,0.06100373268127442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,8192,0.07474986712137857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,8192,0.04642453193664551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,8192,0.051178665955861416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,7168,0.04102186759312947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,65536,512,0.02069546580314636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,7168,0.04567466576894124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,6144,0.05155413150787354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,6144,0.04266133308410645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,6144,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,5120,0.04383999903996785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,5120,0.03779093424479167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,5120,0.0308405339717865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,4096,0.036414933204650876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,4096,0.03352320194244385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,4096,0.026552534103393553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,3584,0.03327359954516093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,3584,0.03164693315823873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,3584,0.023974400758743287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,3072,0.02832746704419454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,3072,0.028585600852966308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,3072,0.021978666385014854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,2560,0.024285866816838583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,2560,0.026526933908462523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,2560,0.019654399156570433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,2048,0.020383999745051066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,2048,0.024207999308904014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,2048,0.01681813398996989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,1536,0.016383999586105348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,1536,0.02148053248723348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,1024,0.011925333738327026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,1024,0.01984213391939799
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,1024,0.011314133803049724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,768,0.010362666845321656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,768,0.018997333447138467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,768,0.010391466816266378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,512,0.007505066692829132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,512,0.017083734273910522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,512,0.0090037335952123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,256,0.004833066463470459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,256,0.015884799758593242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,256,0.007207466661930085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,128,0.0040224000811576845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,128,0.015821866194407144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,64,0.0038442666331926978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,64,0.015682133038838704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,16384,32,0.0038783999780813852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,16384,32,0.015675733486811318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,16384,0.10787413120269776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,65536,0.2389685312906901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,16384,0.07000426451365152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,65536,0.40998185475667315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,16384,0.07616639931996663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,1536,0.013900799552599588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,65536,0.2975125312805176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,12288,0.08415253162384033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,12288,0.05943573315938314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,12288,0.05797973473866781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,10240,0.07264426549275717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,10240,0.04986773331960042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,8192,0.054656000932057705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,8192,0.04331733385721843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,7168,0.04514453411102295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,7168,0.03967680136362712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,6144,0.04138666788736979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,6144,0.035420799255371095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,5120,0.035396265983581546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,5120,0.03229973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,16384,128,0.006605866551399231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,5120,0.026971733570098876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,4096,0.028195200363794963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,4096,0.02950506607691447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,3584,0.025139200687408447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,3584,0.0276256004969279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,3072,0.02291946609814962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,3072,0.025947733720143633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,8192,0.040753066539764404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,2560,0.02005973259607951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,2560,0.023819732666015624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,7168,0.03581226666768392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,2048,0.01704960068066915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,6144,0.03177599906921387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,2048,0.014387200276056925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,2048,0.02236586610476176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,1536,0.013165866335233053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,1536,0.020538665850957236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,1536,0.012283733487129212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,4096,0.023603200912475586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,1024,0.01016960044701894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,1024,0.018556799491246542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,768,0.008594133456548055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,3584,0.02142613331476847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,768,0.017834667364756265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,512,0.006945066650708516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,512,0.01606933375199636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,512,0.007518933216730754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,256,0.004469333092371622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,3072,0.019106133778889974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,256,0.015528532862663268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,128,0.0038101332883040107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,10240,0.05062079826990763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,128,0.01495253344376882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,64,0.0034613333642482757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,12288,32,0.0036757332583268487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,64,0.015258666872978211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,12288,32,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,2560,0.017083734273910522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,65536,0.3641738573710124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,65536,0.2066805362701416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,65536,0.25934720039367676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,16384,0.09645866552988688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,16384,0.0740991989771525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,12288,0.07613120079040528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,12288,0.049504001935323075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,10240,0.06588053305943807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,1024,0.01011306643486023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,10240,0.04407999912897746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,10240,0.046349867184956865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,8192,0.05421760082244873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,768,0.008700799942016602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,8192,0.03854399919509888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,7168,0.03921066522598267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,7168,0.03528000116348266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,6144,0.03435946702957153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,256,0.006401066482067108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,6144,0.03264853358268738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,5120,0.02956586678822835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,12288,128,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,5120,0.03006826639175415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,5120,0.025520000855127973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,4096,0.024661332368850708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,4096,0.02674986720085144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,3584,0.02184106707572937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,3584,0.02526293396949768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,3584,0.019949867328008016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,3072,0.019577600558598838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,3072,0.023521065711975098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,2560,0.01747200091679891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,2560,0.022073600689570108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,12288,0.05117119948069254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,2048,0.015762133399645488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,2048,0.02058239976565043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,8192,0.03777066469192505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,7168,0.03373226722081502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,1536,0.011528533697128297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,1536,0.01996586720148722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,6144,0.029448533058166505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,1024,0.009065600236256917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,1024,0.01821119983990987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,1024,0.009475200374921163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,768,0.007826133569081625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,768,0.016323199868202208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,4096,0.02167466680208842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,512,0.006450133522351582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,16384,0.07421013514200846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,512,0.01588373382886251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,3072,0.017720532417297364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,256,0.004341333111127218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,256,0.015355733036994935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,256,0.006093866626421611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,128,0.003789866715669632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,2560,0.015590399503707886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,128,0.015005866686503092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,128,0.0054282665252685545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,64,0.0035317334036032355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,64,0.01495146652062734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,10240,32,0.0034933333595593774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,10240,32,0.014914133151372275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,2048,0.013501866658528646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,16384,0.07270932992299398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,65536,0.2733375867207845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,65536,0.1696842670440674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,1536,0.011521066228548687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,16384,0.05144960085550944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,16384,0.06340053478876749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,12288,0.05706026554107666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,12288,0.04347840150197347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,10240,0.04405119816462199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,768,0.008182399968306223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,10240,0.03856746753056844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,8192,0.03602879842122396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,8192,0.034168533484141034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,7168,0.03184746702512105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,7168,0.0317194660504659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,6144,0.02780906756718953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,6144,0.029411200682322187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,5120,0.024206932385762533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,5120,0.027079466978708906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,5120,0.023110399643580117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,4096,0.020503467321395873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,4096,0.024114133914311726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,65536,0.2332512060801188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,3584,0.020006400346755982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,3584,0.02295893430709839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,12288,0.04633920192718506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,3072,0.016199466586112977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,10240,0.03949226538340251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,3072,0.022111999988555908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,10240,512,0.007190399865309398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,8192,0.03352533181508382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,2560,0.014007467031478881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,7168,0.029806933800379437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,2048,0.011471999684969585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,2560,0.02108479936917623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,6144,0.026893866062164307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,2048,0.01967039903004964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,1536,0.009839999675750732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,1536,0.018657066424687705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,1024,0.007681066791216533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,1024,0.016082132856051125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,1024,0.009011200070381165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,768,0.006562133133411407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,768,0.016645333170890807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,512,0.005018666883309682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,4096,0.019573332866032918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,512,0.015734400351842245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,512,0.007032533486684163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,256,0.004045866678158442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,256,0.015146666765213012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,128,0.0034346667428811393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,3584,0.018692266941070557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,128,0.014797866344451904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,64,0.0032597333192825317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,3072,0.01620693306128184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,64,0.015094400445620219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,8192,32,0.0033312000334262846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,2560,0.01439786652723948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,8192,32,0.01530346671740214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,2048,0.012708266576131185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,65536,0.2615616003672282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,16384,0.06681280136108399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,65536,0.15579412778218588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,1536,0.010822400450706482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,16384,0.05008426507314047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,12288,0.04531626701354981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,12288,0.03974506855010986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,12288,0.04967679977416992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,10240,0.03919999996821086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,768,0.008146133522192638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,10240,0.035979731877644854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,8192,0.03184960087140401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,8192,0.03189866741498311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,7168,0.028382933139801024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,7168,0.029471999406814574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,7168,0.030668799082438154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,6144,0.02504853407541911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,6144,0.02741439938545227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,6144,0.027433600028355914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,128,0.005771733323733012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,5120,0.021604265769322714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,5120,0.025009065866470337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,4096,0.018531199296315512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,4096,0.02288533250490824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,65536,0.23724053700764974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,16384,0.06339413324991862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,3584,0.017539199193318686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,3584,0.02241920034090678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,3072,0.014490666985511779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,10240,0.04219199816385905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,3072,0.020809600750605263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,8192,0.03491199811299642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,2560,0.012390399972597759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,8192,256,0.006602666775385539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,2560,0.02021119991938273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,2048,0.010519466797510783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,2048,0.018874667088190713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,1536,0.008753066261609394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,5120,0.023305600881576537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,1536,0.017909334103266398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,1024,0.006967466572920482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,1024,0.01628159979979197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,768,0.006085333228111267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,4096,0.020032000541687012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,768,0.015698132912317912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,768,0.007153066496054332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,512,0.004226133227348328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,512,0.015242666999499003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,512,0.006525866687297821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,256,0.0037258667250474296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,3584,0.017858133713404337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,256,0.014905599753061929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,128,0.003357866654793421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,128,0.01453013320763906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,128,0.005206400156021118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,3072,0.016353066762288412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,64,0.0032287999987602235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,2560,0.014485333363215128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,64,0.01499626636505127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,7168,32,0.0031744000812371576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,7168,32,0.014948266744613647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,65536,0.20494400660196938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,65536,0.13477439880371095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,16384,0.05892053445180258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,1536,0.010603732864061991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,16384,0.04684906800587972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,12288,0.04629013140996297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,12288,0.03717653354008992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,1024,0.00842026670773824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,10240,0.04052373170852661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,10240,0.033190399408340454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,8192,0.028221867481867474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,8192,0.02982826630274455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,7168,0.024869332710901894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,7168,0.027868799368540448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,256,0.0057087997595469155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,6144,0.022155733903249104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,6144,0.025305600961049397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,7168,2048,0.012502400080362954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,5120,0.019362133741378785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,5120,0.023806933561960855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,65536,0.23053013483683266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,16384,0.06303253173828124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,4096,0.017968000968297322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,12288,0.0445087989171346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,4096,0.021684267123540244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,3584,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,10240,0.03988266785939534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,3584,0.021451733509699502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,3072,0.012638933459917703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,8192,0.03300053278605143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,3072,0.02029119928677877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,2560,0.011150933305422465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,2560,0.01931519905726115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,7168,0.03004266619682312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,2048,0.009594666957855224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,2048,0.018644267320632936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,1536,0.007843199868996937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,6144,0.025946666797002155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,1536,0.017438934246699015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,1024,0.006401066482067108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,1024,0.016290133198102318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,4096,0.018923733631769815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,768,0.0052042668064435325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,768,0.01572053333123525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,3584,0.017771732807159425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,768,0.007222400108973186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,3072,0.01599360009034475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,512,0.004300799965858459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,512,0.015414399902025857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,256,0.003656533360481262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,2560,0.014077867070833841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,256,0.015054933230082192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,128,0.0033141332368055976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,128,0.01504533290863037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,2048,0.012123733758926392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,64,0.003089066594839096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,6144,32,0.0031818665564060213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,64,0.01474453310171763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,1536,0.01028053363164266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,6144,32,0.014873600006103516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,5120,0.02287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,65536,0.18321599960327148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,1024,0.008038400113582611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,65536,0.11796693007151286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,16384,0.05478613376617432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,16384,0.04495253165562947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,12288,0.04036266803741455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,12288,0.03417706489562988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,10240,0.035214932759602864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,10240,0.03185706734657288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,512,0.006239999830722809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,8192,0.029891200860341388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,256,0.005465599894523621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,8192,0.028124799331029255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,7168,0.022051199277242025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,6144,128,0.005062399804592133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,7168,0.02598080039024353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,6144,0.01964906652768453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,6144,0.02387839953104655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,65536,0.22703146934509277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,5120,0.017128533124923705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,16384,0.05890773137410482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,5120,0.022686932484308878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,12288,0.04625920057296753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,4096,0.014647466937700906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,4096,0.020662399133046468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,10240,0.039996798833211264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,3584,0.01262933313846588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,3584,0.020827732483545938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,3072,0.01146986683209737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,3072,0.019882667064666747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,8192,0.03209493358929952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,2560,0.010075733065605164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,2560,0.01890666683514913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,2560,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,2048,0.008813866972923278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,7168,0.02921280066172282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,2048,0.0180896004041036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,1536,0.007379200061162312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,6144,0.025381332635879515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,1536,0.016938666502634682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,1024,0.0061141331990559895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,1024,0.016080000003178916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,1024,0.007852800190448761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,768,0.005087999999523163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,768,0.015408000349998474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,5120,0.021259733041127524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,768,0.007396266857783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,512,0.003916800022125244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,512,0.014889599879582724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,4096,0.01853013237317403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,256,0.0036799999574820197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,256,0.014735999703407287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,3584,0.01691840092341105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,128,0.0032970666885375976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,128,0.014986667037010192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,64,0.0031008000175158186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,3072,0.01548479994138082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,5120,32,0.003094399968783061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,64,0.014486400286356607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,5120,32,0.014804266889890037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,65536,0.14394133885701496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,65536,0.09941120147705078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,2048,0.011770666639010111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,16384,0.0440170685450236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,16384,0.03834773302078247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,1536,0.009875200192133586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,12288,0.032764800389607746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,12288,0.03342080116271973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,12288,0.038661332925160725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,10240,0.027742934226989747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,10240,0.030750934282938642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,8192,0.024241065979003905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,8192,0.025711999336878462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,7168,0.021489065885543824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,7168,0.024130133787790935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,7168,0.025931733846664428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,256,0.005471999943256378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,6144,0.019463467597961425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,6144,0.022989867130915324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,128,0.005095466474692027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,5120,0.01730133295059204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,5120,0.021547732750574748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,5120,0.01943999926249186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,4096,0.012778666615486146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,4096,0.020524799823760986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,65536,0.20853439966837564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,3584,0.011429333686828613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,3584,0.01990293264389038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,16384,0.052858666578928626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,3072,0.010100266337394715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,3072,0.01876373291015625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,3072,0.014311466614405313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,2560,0.00876586635907491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,2560,0.01820053259531657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,10240,0.03454933166503906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,2048,0.0076682666937510175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,2048,0.018017067511876424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,5120,512,0.006404266754786174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,8192,0.029677865902582805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,1536,0.006829866766929626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,1536,0.016723199685414632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,1024,0.005306666592756907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,1024,0.015974400440851848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,6144,0.02329813241958618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,768,0.004155733436346054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,768,0.015425067146619162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,4096,0.016872533162434897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,512,0.0037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,3584,0.015506133437156677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,512,0.015012266238530478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,256,0.003500800083080927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,2560,0.012385066350301106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,256,0.014620799819628397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,128,0.0031626666585604347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,2048,0.010557867089907328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,128,0.014432000120480857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,64,0.0030400000512599947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,1536,0.008649599552154542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,64,0.014747732877731323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,4096,32,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,4096,32,0.014851199587186179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,65536,0.1317951997121175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,65536,0.09671573638916016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,65536,0.17771414120992024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,16384,0.040158931414286295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,16384,0.0357258677482605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,12288,0.028281599283218384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,768,0.00721919983625412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,12288,0.03111039996147156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,10240,0.025044266382853193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,10240,0.028391466538111372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,8192,0.021653334299723305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,512,0.006099199752012888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,8192,0.02574933369954427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,7168,0.01932906707127889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,256,0.005491200089454651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,7168,0.024401066700617473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,128,0.005201066533724466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,6144,0.017485866943995156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,6144,0.022056533892949422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,4096,1024,0.007657599945863088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,5120,0.015373866756757101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,5120,0.019195733467737834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,5120,0.02116586764653524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,4096,0.013296000162760415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,4096,0.020219733317693077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,16384,0.04908906618754069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,3584,0.01034453312555949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,12288,0.03938239812850952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,3584,0.019268266359965005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,3584,0.015681067109107973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,3072,0.009340799848238627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,3072,0.018631466229756675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,10240,0.03155413269996643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,2560,0.008339200417200725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,2560,0.017755732933680216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,2048,0.007356800138950348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,2048,0.016581333676973977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,8192,0.028782933950424194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,1536,0.00650133341550827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,1536,0.016525866587956746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,7168,0.024341332912445068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,1024,0.004995200037956238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,1024,0.01574720044930776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,1024,0.0079434668024381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,768,0.0041354666153589886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,6144,0.022708266973495483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,768,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,512,0.0037290667494138084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,512,0.01492800017197927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,256,0.003311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,4096,0.01647040049235026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,256,0.014865066607793173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,128,0.0031167998909950255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,128,0.014763733744621277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,2560,0.01211840013662974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,2048,0.010145066181818645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,64,0.0029088000456492106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3584,32,0.002974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,64,0.014457600315411887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,1536,0.008756267031033833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3584,32,0.014651733636856078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,65536,0.108406400680542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,65536,0.08478399912516275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,16384,0.03374079863230388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,16384,0.033243733644485476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,768,0.006905599931875865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,12288,0.024953599770863852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,12288,0.02828906575838725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,512,0.0060917332768440245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,10240,0.022293333212534586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,10240,0.026341332991917925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,256,0.0054175997773806255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,8192,0.018998400370279948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,3072,0.01383786698182424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,8192,0.024035199483235677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,7168,0.01687999963760376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,7168,0.022832000255584718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3584,128,0.0051584000388781226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,6144,0.015072000026702882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,7168,0.02795093258221944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,6144,0.021959465742111207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,5120,0.013315199812253316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,5120,0.02048106590906779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,65536,0.2056938648223877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,16384,0.05481599966684977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,4096,0.011763200163841248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,4096,0.020082134008407592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,12288,0.04276373386383057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,3584,0.010937600334485372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,3584,0.0191210667292277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,10240,0.03598933219909668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,3072,0.010025599598884582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,3072,0.018577067057291667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,8192,0.03115839958190918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,2560,0.00800960014263789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,2560,0.017709867159525553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,2048,0.007271466652552287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,2048,0.016781866550445557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,1536,0.0062826668222745255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,5120,0.020584533611933388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,1536,0.016034133235613503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,1024,0.004457599918047587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,1024,0.015828266739845276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,4096,0.01771093408266703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,768,0.004110933343569437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,768,0.01546239952246348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,3584,0.015966932972272238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,512,0.003638399889071783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,3072,0.014405333002408347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,512,0.014971733093261719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,256,0.003294933338960012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,6144,0.02537493308385213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,2560,0.012782933314641318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,256,0.014468266566594442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,128,0.003050666550795237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,128,0.01427733302116394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,128,0.0050346667567888895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,64,0.0029535998900731405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,2048,0.010948266585667927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,64,0.014429866274197897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,3072,32,0.003102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,3072,32,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,1536,0.00909866690635681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,65536,0.09578879674275717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,65536,0.07718400160471597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,16384,0.02730026642481486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,16384,0.031004800399144487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,12288,0.02200746734937032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,1024,0.007876266539096833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,12288,0.02643839915593465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,10240,0.019318399826685588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,10240,0.024961066246032716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,8192,0.01636693378289541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,768,0.0071370666225751235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,8192,0.022977066040039063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,7168,0.014654933412869772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,512,0.006038400034109751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,7168,0.022425599892934165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,6144,0.013226667046546936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,6144,0.02140480081240336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,6144,0.022482132911682128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,5120,0.011961600184440613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,5120,0.02098133365313212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,65536,0.190174929300944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,16384,0.05156906843185425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,4096,0.010590933760007222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,12288,0.03935893376668294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,4096,0.019522132476170857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,3584,0.009358933568000794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,10240,0.033445334434509276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,3584,0.018962132930755615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,3072,0.008732799688975017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,3072,0.017245866854985557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,2560,0.007890133559703827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,8192,0.027956267197926838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,2560,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,3072,256,0.0055754666527112326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,2048,0.007009066641330719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,7168,0.024966400861740113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,2048,0.009893332918485005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,2048,0.01653333306312561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,1536,0.005634133517742157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,1536,0.01595200002193451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,1024,0.00447573314110438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,1024,0.015582933028539022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,768,0.00413226659099261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,5120,0.019318399826685588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,768,0.015186132987340293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,768,0.006316799918810527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,512,0.0037119999527931214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,512,0.015000533064206442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,256,0.0033482665816942847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,4096,0.016446933150291443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,256,0.014899200201034546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,256,0.005257600049177805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,3584,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,128,0.003099733342727025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,128,0.014383999506632486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,3072,0.01286720037460327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,64,0.0029557332396507262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,64,0.014492799838383993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,2560,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2560,32,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2560,32,0.014402133226394654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,65536,0.07433280150095621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,65536,0.06731626987457276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,16384,0.02287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,65536,0.19388799667358397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,16384,0.027846399943033857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,12288,0.01853760083516439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,1024,0.007681066791216533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,12288,0.024966400861740113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,10240,0.016432000199953715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,10240,0.023091200987497965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,8192,0.01395840048789978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,512,0.005881600081920624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,8192,0.02177706758181254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,7168,0.012612266341845193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,7168,0.022054400046666464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,128,0.004973866542180379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,7168,0.024324266115824382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,6144,0.011345066626866658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,6144,0.020932267109553017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,6144,0.020785067478815714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,5120,0.010468266407648722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2560,1536,0.008760533730189006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,5120,0.02012053330739339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,4096,0.009619200229644775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,4096,0.01943146586418152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,3584,0.009533866246541341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,3584,0.018734933932622273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,16384,0.04882026513417562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,3072,0.00846613347530365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,12288,0.03557013273239136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,3072,0.01726933320363363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,2560,0.007655466596285502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,2560,0.01697173317273458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,10240,0.03204906582832336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,2048,0.006785066425800323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,2048,0.01628373364607493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,8192,0.027667200565338133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,1536,0.0054517333706219995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,1536,0.015762133399645488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,1024,0.00456639975309372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,1024,0.01539520025253296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,5120,0.01837013363838196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,768,0.004106666644414266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,4096,0.015410133202870688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,768,0.015242666999499003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,512,0.0037119999527931214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,3584,0.014591999848683677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,512,0.014899200201034546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,512,0.005913599828879039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,256,0.0033141332368055976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,256,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,256,0.005222400029500326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,128,0.003091199944416682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,2560,0.011666133006413778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,128,0.014421332875887552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,64,0.0028629332780838014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,2048,0.009916800260543823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,64,0.014493866761525472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,2048,32,0.0029674666623274487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,2048,32,0.014630400141080222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,65536,0.0597546656926473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,1536,0.008396800359090168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,65536,0.05910720030466715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,16384,0.018687999248504637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,16384,0.02509653369585673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,1024,0.0073183998465538025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,12288,0.014574933052062988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,12288,0.022894932826360067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,12288,0.03296213348706563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,10240,0.012821333607037863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,10240,0.0218122661113739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,8192,0.011034666498502096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,8192,0.0206496000289917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,7168,0.01036906639734904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,768,0.006460799773534138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,3072,0.01297706663608551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,7168,0.022170666853586832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,6144,0.009819733103116353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,6144,0.02133013407389323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,5120,0.009274666508038838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,2048,128,0.005063466727733612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,5120,0.018838399648666383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,5120,0.016338133811950685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,4096,0.008612266182899475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,4096,0.018513067563374837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,3584,0.008713600039482117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,3584,0.017783466974894205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,65536,0.15186986923217774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,16384,0.04177813529968262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,3072,0.009031466643015544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,3072,0.017041067282358803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,2560,0.006757333377997081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,10240,0.028645332654317218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,2560,0.016478932897249856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,8192,0.0245685338973999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,2048,0.005735466877619425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,2048,0.016004266341527303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,7168,0.023238400618235268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,2048,0.008781866232554118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,1536,0.004923733572165171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,1536,0.016127999623616537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,1024,0.004328533510367076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,1024,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,4096,0.014059733351071677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,768,0.003955200066169103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,768,0.015092266599337259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,3584,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,512,0.00359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,512,0.014819199840227762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,3072,0.011287466684977213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,2560,0.010083199540774027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,256,0.003239466746648153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,6144,0.02008533279101054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,256,0.014506666858990987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,256,0.005095466474692027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,128,0.003014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,128,0.014341333508491516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,1536,0.007686399916807811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,64,0.002752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,1024,0.00647680014371872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,64,0.014634666840235391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1536,32,0.0029290666182835894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1536,32,0.014338133732477823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,65536,0.04008959929148356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,65536,0.049157333374023435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,16384,0.014052266875902811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,65536,0.11722026666005451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,768,0.005842133363087972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,16384,0.023125332593917847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,12288,0.01157973309357961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,12288,0.02118399937947591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,512,0.0055189331372578945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,10240,0.010620799660682679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,10240,0.020346667369206747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,10240,0.0237610658009847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,8192,0.009739733735720317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,8192,0.020131200551986694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,7168,0.009313066800435383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,7168,0.02001813252766927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1536,128,0.004805333415667216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,6144,0.008830933769543966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,6144,0.01853760083516439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,5120,0.008391466736793519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,5120,0.01907520095507304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,4096,0.007294933497905731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,4096,0.0180949330329895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,4096,0.011409067114194234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,3584,0.007609599828720092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,3584,0.0179584006468455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,3584,0.010589866836865743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,3072,0.007518933216730754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,3072,0.017401599884033205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,16384,0.03524693250656128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,2560,0.0064533332983652755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,2560,0.01639253298441569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,2560,0.008872532844543457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,2048,0.0055744002262751256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,2048,0.00767573316891988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,2048,0.016140799721082053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,1536,0.004995200037956238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,8192,0.01981653372446696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,1536,0.016497066617012023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,1024,0.0042346666256586705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,1024,0.015511467059453329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,7168,0.017552000284194947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,768,0.0039061332742373147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,768,0.015058133006095886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,768,0.005737600227197012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,6144,0.01590720017751058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,512,0.01493333379427592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,512,0.0036149332920710243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,5120,0.012958932916323343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,256,0.003270400067170461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,256,0.014338133732477823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,128,0.002898133297761281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,128,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,128,0.004722133278846741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,64,0.002883200099070867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,64,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,12288,0.02511253356933594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,1024,32,0.002850133428970973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,1024,32,0.014495999614397685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,3072,0.009144533673922222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,65536,0.03487999836603801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,65536,0.04565013249715169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,16384,0.011331199606259664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,16384,0.0220960001150767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,12288,0.009699199597040813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,12288,0.02151040037473043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,1536,0.006972800195217133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,10240,0.009455999732017517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,10240,0.020434133211771646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,1024,0.006353066861629486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,8192,0.010849066575368245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,8192,0.02082560062408447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,8192,0.015999999642372132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,7168,0.009272533655166625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,512,0.005233066777388254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,7168,0.01941653291384379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,1024,256,0.004910933474699656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,6144,0.009155199925104777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,6144,0.019370667139689126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,6144,0.012711466352144877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,5120,0.00782293329636256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,5120,0.018891733884811402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,5120,0.011013333002726238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,4096,0.006883200009663899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,65536,0.1075914700826009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,4096,0.017997866868972777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,4096,0.009879466891288758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,16384,0.02985173265139262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,3584,0.007076266904671986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,3584,0.01761386593182882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,12288,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,3072,0.0066336000959078475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,3072,0.01704533298810323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,2560,0.006342400113741558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,3072,0.008258133133252462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,2560,0.016633599996566772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,2560,0.008101333181063335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,2048,0.005594666798909505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,2048,0.016167466839154564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,2048,0.006985599795977275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,1536,0.004855466882387797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,1536,0.01591679950555166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,1536,0.006578133503595989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,1024,0.004265599946180979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,1024,0.015386666854222616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,7168,0.014190933108329773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,1024,0.0058794667323430385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,768,0.0038965334494908653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,768,0.015333333611488342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,768,0.005494399865468343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,256,0.003102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,512,0.0036042665441830954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,512,0.014817066987355552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,512,0.005157333115736643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,256,0.014518400033315023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,256,0.004822400212287903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,128,0.0028746667007605235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,128,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,128,0.0046741331617037455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,64,0.002699733277161916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,3584,0.009402666489283245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,64,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,768,10240,0.018331732352574667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,768,32,0.0028522667785485585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,768,32,0.014230400323867798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,65536,0.024833067258199056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,65536,0.04022080103556315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,65536,0.09321920077006021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,16384,0.010156800349553425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,16384,0.021476266781489055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,16384,0.025614933172861738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,12288,0.008974933624267578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,12288,0.02039360006650289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,10240,0.008586666981379191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,10240,0.01917546590169271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,8192,0.007998933394749958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,7168,0.01933120091756185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,8192,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,8192,0.01923946738243103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,7168,0.007645866771539052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,6144,0.006819200019041698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,6144,0.01869759956995646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,5120,0.00722453345855077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,5120,0.018617600202560425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,5120,0.010620799660682679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,4096,0.01813546617825826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,4096,0.007062399884064992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,3584,0.006741333504517872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,3584,0.01755626598993937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,3584,0.009107200304667155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,3072,0.006398933132489522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,3072,0.017197867234547935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,3072,0.008158933122952778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,2560,0.006212266782919565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,2560,0.016798933347066246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,12288,0.019581866264343262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,2048,0.005471999943256378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,2048,0.016260266304016113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,2048,0.007038933535416921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,10240,0.01716586748758952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,1536,0.004791466891765595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,1536,0.0162090669075648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,1536,0.006483200192451477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,1024,0.004137599964936575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,1024,0.015238400300343832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,768,0.00383786658445994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,7168,0.013226667046546936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,768,0.015218133727709452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,768,0.00521919975678126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,512,0.003499733408292135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,6144,0.011932800213495891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,512,0.014825600385665893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,256,0.003068800022204717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,256,0.014404267072677612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,256,0.004691199958324432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,4096,0.009430399537086487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,128,0.0028917332490285236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,128,0.014356266458829245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,128,0.004704000055789947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,64,0.002639999985694885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,64,0.014201600352923074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,512,32,0.0029109333952267963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,512,32,0.014105600118637086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,65536,0.016127999623616537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,65536,0.034747731685638425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,2560,0.007713066538174947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,65536,0.08566400210062662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,16384,0.024026666084925333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,16384,0.009790933132171631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,16384,0.02027946710586548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,12288,0.008222933113574981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,12288,0.01944640080134074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,12288,0.01916159987449646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,10240,0.007358933488527934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,10240,0.019834667444229126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,10240,0.016822399695714314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,8192,0.0067007998625437425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,8192,0.018633600076039633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,8192,0.014409599701563516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,7168,0.013255467017491659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,7168,0.006393600006898243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,512,0.004982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,7168,0.018780799706776936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,6144,0.006197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,6144,0.018616533279418944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,5120,0.006562133133411407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,5120,0.010548266768455505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,5120,0.018655999501546224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,4096,0.006164266665776571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,4096,0.017808000246683754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,4096,0.009439999858538311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,3584,0.00654720018307368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,3584,0.017884800831476845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,3584,0.008982400099436443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,3072,0.006209066510200501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,3072,0.01685333251953125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,2560,0.006123733520507812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,512,1024,0.005849599838256836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,2048,0.01592853367328644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,2560,0.01627626617749532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,2048,0.005401599903901418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,1536,0.004811733464399974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,1536,0.015759999553362526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,1024,0.004009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,1024,0.015593600273132325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,768,0.0037205333511034647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,768,0.005310933291912079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,768,0.015100799997647605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,512,0.0033344000577926634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,6144,0.011714133620262145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,512,0.005036800106366476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,512,0.015422933300336204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,256,0.0030741333961486817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,256,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,256,0.004799999793370565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,128,0.002868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,128,0.014246400197347006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,64,0.002812800059715907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,64,0.014301866292953491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,3072,0.008227199812730153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,256,32,0.0026687999566396077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,256,32,0.0143477330605189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,65536,0.012202666203180949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,2560,0.00769706666469574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,65536,0.0313920001188914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,65536,0.0856117328008016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,16384,0.006896000107129414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,2048,0.006954666475454967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,16384,0.019462400674819948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,12288,0.0063498665889104204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,1536,0.006520533561706543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,12288,0.01913493275642395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,12288,0.019350399573644005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,10240,0.006493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,1024,0.005577600002288819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,10240,0.019604265689849854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,10240,0.01697280009587606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,8192,0.006467199822266896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,8192,0.018294399976730345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,7168,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,7168,0.01840959986050924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,7168,0.013267200191815695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,6144,0.006179200112819671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,6144,0.018161066373189292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,5120,0.0065098668138186145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,5120,0.018412800629933675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,4096,0.0060127998391787205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,4096,0.017453867197036742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,3584,0.006482133269309997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,3584,0.017798399925231932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,3584,0.008950400352478027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,3072,0.006086400151252747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,3072,0.01679253379503886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,16384,0.02446613311767578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,2560,0.006053333481152853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,2560,0.016404267152150473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,8192,0.014478933811187745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,2048,0.005364266534646353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,2048,0.015875200430552162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,256,128,0.004715733230113983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,2048,0.006817066669464111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,1536,0.004724266628424326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,1536,0.01614400049050649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,6144,0.011757866541544596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,1536,0.00664106657107671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,1024,0.00407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,1024,0.015229866902033488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,1024,0.005932799975077311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,5120,0.010601600011189777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,768,0.0037589333951473236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,768,0.015044266978899637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,4096,0.009427199761072796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,512,0.0033269333342711128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,512,0.01456000010172526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,512,0.005015466610590617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,256,0.003052799900372823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,256,0.014344533284505209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,256,0.004725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,128,0.002784000088771184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,128,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,64,0.002700799951950709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,64,0.014215466380119324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,2560,0.007804800073305767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,128,32,0.002690133452415466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,128,32,0.014297599593798319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,65536,0.009303466478983561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,65536,0.027948800722757978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,16384,0.0062837332487106325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,16384,0.01986133257548014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,12288,0.006192000210285186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,12288,0.019011199474334717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,10240,0.0063285330931345625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,10240,0.018987733125686645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,8192,0.006180266539255777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,3072,0.008024533092975617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,8192,0.018556799491246542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,7168,0.006064000229040781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,7168,0.018634666999181114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,6144,0.005994666616121928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,6144,0.01862293283144633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,5120,0.00636053333679835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,5120,0.018458666404088338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,4096,0.005890133480230967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,4096,0.01760639945665995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,3584,0.006523733337720235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,3584,0.017498666048049928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,3072,0.006128000219662985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,3072,0.016698666413625083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,2560,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,2560,0.016614400347073875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,2048,0.005434666574001312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,128,0.004555733501911163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,2048,0.01578986644744873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,1536,0.0046165332198143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,1536,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,1024,0.00402453343073527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,1024,0.015156267086664834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,768,0.0037109332780043284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,768,0.015035733580589294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,512,0.0033312000334262846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,128,0.014797866344451904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,512,0.014647466937700906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,256,0.002921599894762039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,256,0.014382933576901754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,128,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,64,0.0026176000634829206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,80,128,768,0.0053962667783101406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,64,0.014134400089581809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,64,32,0.002842666705449422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,12288,0.006230400005976359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,64,32,0.014122666915257773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,65536,0.011170132954915365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,65536,0.02827306588490804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,16384,0.006361599763234456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,16384,0.019978666305541994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,12288,0.01914880077044169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,10240,0.006467199822266896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,10240,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,8192,0.0062496001521746315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,8192,0.018462934096654258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,7168,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,7168,0.018870399395624796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,6144,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,6144,0.018207999070485432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,5120,0.006322133541107178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,5120,0.018659200270970663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,4096,0.0058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,4096,0.01758079926172892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,3584,0.006343466540177663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,3584,0.017139200369517008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,3072,0.0060032000144322716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,3072,0.01683626572291056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,2560,0.006234666705131531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,2560,0.016214399536450704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,2048,0.0054175997773806255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,1024,0.015502933661142984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,2048,0.015774933497111003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,1536,0.004655999938646952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,1536,0.015508266290028891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,1024,0.00407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,768,0.0036757332583268487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,512,0.0032981333633263906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,768,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,512,0.014647466937700906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,256,0.002921599894762039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,256,0.014223999778429666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,128,0.0027989332874615988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,128,0.014345600207646688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,64,0.00266239990790685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,64,0.014315733313560485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,80,32,32,0.0026634665826956432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,80,32,32,0.014135467012723288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,16384,0.30149758656819664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,16384,0.5664693196614583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,12288,0.23127573331197104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,12288,0.4287295977274577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,10240,0.3573301315307617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,10240,0.19772052764892578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,8192,0.28929707209269206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,8192,0.17216639518737792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,7168,0.25382186571757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,7168,0.14307093620300293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,6144,0.21765119234720864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,6144,0.13403093020121257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,5120,0.18312427202860515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,5120,0.10689386526743572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,4096,0.14855893452962238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,4096,0.08877120018005372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,12288,0.19836799303690594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,10240,0.18130346934000652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,3584,0.12976213296254474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,3584,0.08790079752604166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,8192,0.13841279347737628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,7168,0.12606720129648846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,3072,0.11272106965382893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,3072,0.07340266704559326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,2560,0.09437653223673502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,6144,0.1047317345937093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,2560,0.06203840176264445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,2048,0.07619520028432211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,2048,0.054382932186126706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,5120,0.08947626749674478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,1536,0.05927573442459107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,1536,0.04788693189620972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,1024,0.04174186786015828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,4096,0.07296853065490723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,1024,0.03591786623001099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,768,0.03221759994824727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,768,0.03138239979743958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,16384,0.2728543917338053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,512,0.02600640058517456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,512,0.025935999552408856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,3072,0.057309865951538086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,768,0.02196906606356303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,256,0.014539733529090881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,256,0.02054826617240906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,2048,0.04041066567103068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,128,0.008642133076985676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,128,0.01880853374799093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,64,0.0071946665644645694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,1024,0.027001599470774334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,64,0.017805866400400796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,65536,32,0.006153599917888641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,65536,32,0.018042665719985963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,65536,0.2914570808410645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,65536,0.576909891764323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,3584,0.06745493412017822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,2560,0.04901119867960612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,16384,0.15916479428609212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,16384,0.08548053105672201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,12288,0.09712533156077066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,12288,0.06825173695882161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,10240,0.0823263963063558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,10240,0.05945279995600382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,128,0.011913599570592244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,256,0.01381653348604838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,8192,0.06661760012308757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,8192,0.0511242667833964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,1536,0.03317226568857829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,7168,0.05845973491668701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,7168,0.046419199307759604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,6144,0.05085440079371134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,6144,0.04249386787414551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,65536,512,0.017579734325408936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,5120,0.04482986529668172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,5120,0.03918399810791016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,16384,0.07284800211588541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,65536,0.2686346689860026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,4096,0.03595199982325236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,4096,0.035224533081054686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,12288,0.057378133138020836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,3584,0.03151253263155619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,3584,0.0319925328095754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,10240,0.047787733872731525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,3072,0.027873067061106364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,3072,0.028907734155654907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,8192,0.039724798997243245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,2560,0.023963733514149984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,2560,0.026756266752878826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,7168,0.035631998380025225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,2048,0.019778132438659668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,2048,0.024014933904012045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,6144,0.031447466214497885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,1536,0.016051200032234193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,1536,0.02133013407389323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,1024,0.011282133062680562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,1024,0.019244800011316933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,5120,0.02832213242848714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,768,0.009846400221188862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,768,0.01818880041440328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,4096,0.023539199431737264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,512,0.007896533111731212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,512,0.016618667046229045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,3072,0.019322667519251505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,256,0.004589866598447164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,256,0.01541759967803955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,3584,0.02180373271306356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,128,0.004088533421357473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,128,0.015250133474667868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,2048,0.014507733782132468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,64,0.0037216000258922578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,64,0.015388799707094827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,16384,32,0.0037685332198937735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,1024,0.010193066795667012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,16384,32,0.015524267156918844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,768,0.009321600198745728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,65536,0.40338560740152996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,65536,0.23239466349283852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,16384,0.11865066687266032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,16384,0.06909120082855225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,2560,0.01732586622238159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,12288,0.07331519921620687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,12288,0.05347626606623331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,10240,0.06246826648712158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,512,0.007405866682529449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,10240,0.047842133045196536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,8192,0.05366186698277792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,8192,0.04170986811319987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,256,0.006155733267466227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,7168,0.04674559831619263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,7168,0.03803306818008423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,128,0.005804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,6144,0.039061331748962404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,6144,0.03500906626383464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,16384,1536,0.01241386632124583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,5120,0.034789331754048664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,5120,0.031649067004521685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,12288,0.043935998280843096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,4096,0.029047467311223346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,4096,0.02829440037409465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,65536,0.21291839281717934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,3584,0.02756800055503845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,16384,0.056222931543986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,3584,0.02686506708463033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,3072,0.022504534324010214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,3072,0.024521599213282265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,10240,0.0377781351407369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,2560,0.01955519914627075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,2560,0.023104000091552734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,6144,0.025064533948898314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,2048,0.01594986617565155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,2048,0.021409066518147786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,7168,0.028490666548411054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,8192,0.031506133079528806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,1536,0.012949333588282267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,1536,0.020143999656041463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,1024,0.009877333045005798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,1024,0.018180267016092936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,5120,0.022272000710169472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,768,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,768,0.016940800348917644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,3072,0.015629866719245912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,512,0.006615466872851054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,4096,0.01913493275642395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,512,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,256,0.004166399935881296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,256,0.01534293293952942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,1536,0.010351999600728353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,2560,0.013754666845003764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,128,0.003740799923737844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,3584,0.017485866943995156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,128,0.014769066373507181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,64,0.0034400001168251038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,64,0.015009066462516785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,12288,32,0.003398400048414866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,12288,32,0.01523413360118866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,65536,0.33840853373209634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,65536,0.20078825950622559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,2048,0.011964799960454305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,1024,0.008782933155695598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,16384,0.09544106324513754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,16384,0.05785813331604004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,12288,0.06150720119476318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,12288,0.04750399986902873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,768,0.007612800101439159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,10240,0.055927467346191403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,10240,0.04229226509730021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,512,0.005914666752020518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,8192,0.04253546794255574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,128,0.005087999999523163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,8192,0.037512532869974774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,7168,0.037766400973002115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,7168,0.03458133141199748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,6144,0.03296746611595154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,6144,0.03198080062866211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,12288,256,0.005380266904830932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,5120,0.028518400589625043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,5120,0.02972480058670044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,12288,0.037994666894276934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,4096,0.023753599325815836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,65536,0.17688533465067546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,16384,0.04874560038248698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,4096,0.027129600445429485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,3584,0.0211136003335317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,3584,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,3072,0.018731733163197838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,3072,0.023534933725992836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,10240,0.03323626716931661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,2560,0.016784000396728515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,2560,0.021949867407480873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,6144,0.02193066676457723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,2048,0.014603733023007711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,7168,0.02492693265279134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,2048,0.020487467447916664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,1536,0.011300266782442728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,8192,0.027799467245737713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,1536,0.019358932971954346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,1024,0.008740267157554627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,1024,0.01753386656443278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,5120,0.019741866985956827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,768,0.007401599983374278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,768,0.01592853367328644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,3072,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,512,0.0058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,512,0.015054933230082192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,4096,0.01767680048942566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,256,0.0037600000699361167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,256,0.015016532937685647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,3584,0.014985600113868713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,128,0.0034602666894594826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,128,0.014643200238545737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,2560,0.012282666563987733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,64,0.0030933332939942675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,64,0.014873600006103516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,10240,32,0.0031541332602500914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,10240,32,0.014910933375358582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,1536,0.009436800082524618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,65536,0.2661365350087484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,65536,0.16359574000040691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,16384,0.07107199827829996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,16384,0.05221333503723145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,2048,0.010633599758148194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,1024,0.008057599763075511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,12288,0.055447467168172206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,12288,0.042260265350341795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,10240,0.043911465009053546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,10240,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,768,0.006171733140945435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,8192,0.03593920071919759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,256,0.0052255998055140175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,512,0.005658666789531708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,8192,0.033318400382995605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,7168,0.0328981339931488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,10240,128,0.004951466619968414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,7168,0.030697600046793623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,6144,0.028596266110738115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,6144,0.02844799955685933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,5120,0.023622399568557738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,5120,0.027053866783777875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,65536,0.14954667091369628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,4096,0.02217280069986979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,16384,0.041066666444142655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,4096,0.024330667654673257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,3584,0.018441599607467652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,12288,0.03209386666615804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,3584,0.022776534159978233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,3072,0.016455466548601784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,10240,0.027805866797765096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,3072,0.021990400552749634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,2560,0.014308266838391624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,2560,0.02044586737950643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,8192,0.02312320073445638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,7168,0.020968532562255858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,2048,0.01202453374862671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,2048,0.020193066199620566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,6144,0.018976000944773357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,1536,0.009512533744176228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,1536,0.01853546698888143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,1024,0.0077237332860628765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,5120,0.0168778657913208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,1024,0.01595093309879303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,768,0.006489600241184235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,4096,0.014282666643460593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,768,0.015828266739845276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,3072,0.011969066659609477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,512,0.004610133171081543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,512,0.015404799580574035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,256,0.003819733361403147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,256,0.014866133530934652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,3584,0.013563733299573263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,2048,0.009833600123723347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,128,0.003537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,128,0.014855466286341348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,64,0.0031914666295051576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,1024,0.006712533533573151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,2560,0.01097920040289561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,64,0.015083733201026916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,8192,32,0.0033557333052158357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,8192,32,0.014958932995796204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,65536,0.24921065966288247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,65536,0.15098986625671387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,16384,0.06916159788767497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,16384,0.04720960060755412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,1536,0.008819199601809184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,12288,0.0513589342435201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,12288,0.03915733496348063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,10240,0.03933013280232747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,10240,0.035283199946085614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,512,0.005658666789531708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,256,0.0053151999910672505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,768,0.006080000102519989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,8192,0.031600000460942586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,8192,128,0.005014400184154511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,8192,0.03141013383865356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,7168,0.027845333019892376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,7168,0.029250133037567138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,6144,0.024588799476623534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,6144,0.027154133717219038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,5120,0.021245867013931274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,5120,0.025041067600250246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,12288,0.03051626682281494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,65536,0.13595306078592936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,4096,0.018579200903574625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,4096,0.02345386743545532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,16384,0.038152531782786055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,3584,0.016038399934768677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,3584,0.022012799978256226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,3072,0.014099199573198953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,3072,0.021093332767486574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,10240,0.026475733518600462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,2560,0.012626133362452188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,2560,0.020134399334589638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,6144,0.017847466468811034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,2048,0.01060693363348643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,2048,0.019927465915679933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,7168,0.02025066614151001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,8192,0.022427733739217123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,1536,0.00899733304977417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,1536,0.018702934185663857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,1024,0.0071285332242647815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,1024,0.015521066387494406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,5120,0.015796266992886863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,768,0.006152533491452535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,768,0.015829333662986757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,4096,0.013377066453297934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,3072,0.011397332946459452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,512,0.004260266820589701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,512,0.015144532918930054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,256,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,3584,0.01229866643746694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,256,0.014941866199175516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,128,0.0033728001018365227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,2560,0.010500267148017883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,128,0.014667733510335287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,64,0.0032330666979153953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,64,0.014672000209490457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,7168,32,0.0033258666594823206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,7168,32,0.014922666549682616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,1536,0.008350933591524761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,65536,0.20164586702982584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,65536,0.1288863976796468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,16384,0.058397865295410155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,2048,0.009367466966311137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,16384,0.04322559833526611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,1024,0.00621013343334198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,12288,0.042778666814168295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,12288,0.036608000596364335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,10240,0.03888533512751262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,768,0.005576533575852713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,10240,0.0334175984064738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,8192,0.02876053253809611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,8192,0.029716267188390093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,512,0.005393066505591074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,256,0.0048981333772341405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,7168,128,0.004792533318201701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,7168,0.024934399127960204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,7168,0.02779200077056885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,6144,0.022190932432810465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,6144,0.025364265839258833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,5120,0.019322667519251505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,5120,0.023434666792551677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,65536,0.12362879912058514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,12288,0.028207999467849732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,16384,0.035331201553344724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,4096,0.016667733589808144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,4096,0.02148159941037496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,3584,0.014134400089581809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,3584,0.020756266514460244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,10240,0.02477226654688517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,3072,0.01297920048236847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,3072,0.020514132579167683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,8192,0.020757333437601725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,2560,0.011294933160146077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,6144,0.016455466548601784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,2560,0.019592533508936562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,7168,0.018627200524012247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,2048,0.009821866949399311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,2048,0.018500266472498576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,1536,0.008182399968306223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,1536,0.016641066471735636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,1024,0.006546133259932201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,1024,0.015581867098808289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,5120,0.014292266964912415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,768,0.005425066749254862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,768,0.015476266543070475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,4096,0.012418133020401002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,512,0.0040832000474135075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,512,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,3072,0.010553600390752156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,3584,0.011528533697128297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,256,0.003659733384847641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,256,0.014817066987355552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,128,0.003373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,128,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,1536,0.00714026689529419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,2560,0.009849599997202555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,64,0.0030730667213598887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,64,0.01479039986928304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,6144,32,0.0031786667803923286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,6144,32,0.014787200093269347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,2048,0.008820266524950663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,65536,0.1766101360321045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,65536,0.1174623966217041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,16384,0.05343146721522013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,16384,0.04276053508122762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,1024,0.006030933558940887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,12288,0.039025068283081055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,768,0.00537066658337911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,12288,0.03368213176727295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,10240,0.033524266878763836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,10240,0.030316799879074097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,8192,0.028648533423741657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,8192,0.027511467536290485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,512,0.005110399921735128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,256,0.00486826648314794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,7168,0.021637332439422608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,6144,128,0.004636799792448679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,7168,0.02541653315226237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,6144,0.019082667430241902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,6144,0.023618133862813313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,5120,0.016670932372411094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,5120,0.022053333123524983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,12288,0.02614826758702596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,65536,0.11488640308380127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,16384,0.03306453426678975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,4096,0.014131200313568116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,4096,0.0207914670308431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,3584,0.012180266777674358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,3584,0.02014933427174886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,3072,0.01104960044225057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,10240,0.022884267568588256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,3072,0.01960960030555725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,2560,0.009734400113423665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,2560,0.01879146695137024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,8192,0.019338667392730713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,2048,0.008405333757400513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,2048,0.017861332496007284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,6144,0.015063466628392539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,1536,0.007266133526961009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,7168,0.01741866668065389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,1536,0.01616426706314087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,1024,0.006062933305899302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,1024,0.015706666310628257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,5120,0.013596799969673157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,768,0.004448000093301137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,768,0.015491200486818948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,3072,0.01009493370850881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,4096,0.011796266833941142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,512,0.003700266778469086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,512,0.014929067095120749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,256,0.00346666673819224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,3584,0.011168000102043153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,256,0.014678399761517844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,128,0.0031957333286603295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,2560,0.009373866518338521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,128,0.014834133783976236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,64,0.0029909332593282064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,64,0.014747732877731323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,5120,32,0.003019733230272929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,1536,0.006666666766007741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,5120,32,0.014673067132631936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,65536,0.14082026481628418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,65536,0.0952128012975057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,2048,0.008454400300979614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,1024,0.005740800003210703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,16384,0.039206401507059736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,16384,0.03762986660003662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,12288,0.03096533417701721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,12288,0.030300800005594892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,768,0.005490133166313171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,10240,0.027774933973948163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,10240,0.027644799153010054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,512,0.005142400165398916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,256,0.004856533308823904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,8192,0.023477333784103393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,8192,0.025298132499059038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,5120,128,0.004587733248869578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,7168,0.018613332509994508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,7168,0.02327466607093811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,6144,0.01616320013999939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,6144,0.022409600019454957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,5120,0.014061866203943887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,5120,0.021052799622217813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,65536,0.0929248015085856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,4096,0.01295360028743744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,16384,0.02727893392244975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,4096,0.019767467180887857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,3584,0.010584533214569092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,12288,0.02195840080579122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,3584,0.019195733467737834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,3072,0.009765332937240601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,3072,0.01881813406944275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,10240,0.01918506622314453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,2560,0.008504533767700195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,2560,0.017824000120162962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,8192,0.016201600432395935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,2048,0.007639466722806294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,7168,0.014574933052062988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,2048,0.01643946667512258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,1536,0.006660266717274983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,6144,0.01300373375415802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,1536,0.01609813372294108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,1024,0.004874666531880697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,1024,0.015408000349998474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,5120,0.011750400066375732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,768,0.004114133367935816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,768,0.015246933698654175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,4096,0.010664533575375874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,512,0.0037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,512,0.014878933628400167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,3072,0.00925439993540446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,256,0.0032543999453385672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,3584,0.01001706620057424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,256,0.014467199643452963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,128,0.0030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,128,0.014260266224543253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,2560,0.008897067109743754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,64,0.0029877332349618276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,2048,0.0070698668559392285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,64,0.014432000120480857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,4096,32,0.002951466788848241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,4096,32,0.014547200004259745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,1024,0.005542399982611338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,65536,0.1251456022262573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,65536,0.08607786496480306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,16384,0.04000213146209717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,1536,0.006472533444563548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,16384,0.03488959868748982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,768,0.005286400020122528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,12288,0.02757226626078288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,12288,0.030134399731953938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,10240,0.024627200762430825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,10240,0.02741760015487671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,8192,0.020846933126449585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,8192,0.025805866718292235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,512,0.005120000243186951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,256,0.004823466638724009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,7168,0.01897066632906596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,7168,0.023811199267705283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,4096,128,0.004713599880536398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,6144,0.01665386656920115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,6144,0.021581866343816123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,5120,0.0150709331035614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,5120,0.020786132415135702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,65536,0.08915092945098876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,4096,0.011932800213495891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,16384,0.02622293432553609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,4096,0.019522132476170857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,12288,0.021163733800252278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,3584,0.009853866696357728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,3584,0.018927999337514243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,10240,0.018310399850209554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,3072,0.009125333031018574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,3072,0.01847040057182312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,2560,0.008167466521263123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,8192,0.015422933300336204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,2560,0.01758506695429484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,2048,0.007246933380762736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,2048,0.016754132509231568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,7168,0.014014933506647745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,6144,0.012680533528327941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,1536,0.006397866706053417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,1536,0.04880106846491496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,5120,0.011410133043924967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,1024,0.004805333415667216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,1024,0.01535360018412272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,4096,0.010198400417963664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,768,0.004073599974314371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,768,0.015260799725850423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,512,0.0037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,512,0.01493333379427592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,3584,0.009865599870681762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,256,0.0033642667035261786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,2048,0.0068896000583966565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,256,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,128,0.002996266633272171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,2560,0.008441600203514098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,128,0.01437333325544993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,64,0.002828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,64,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3584,32,0.002935466667016347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3584,32,0.014619732896486918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,3072,0.009052800138791402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,1536,0.006381866832574208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,65536,0.10637439886728924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,65536,0.07823466459910075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,16384,0.033958399295806886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,16384,0.03298880060513814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,12288,0.024718934297561647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,12288,0.027767467498779296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,1024,0.005618133147557576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,10240,0.021569067239761354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,10240,0.025396267573038738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,768,0.005321600039800008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,8192,0.018857600291570027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,512,0.005070933202902476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,8192,0.0234442671140035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,256,0.004886400202910105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,7168,0.01676373283068339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3584,128,0.004642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,7168,0.022181334098180135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,6144,0.01495146652062734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,6144,0.02145386735598246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,5120,0.013252266248067222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,5120,0.020441599686940513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,65536,0.09878506660461425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,12288,0.021970132986704506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,16384,0.027990400791168213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,4096,0.010486400127410889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,4096,0.019858133792877198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,3584,0.009315199653307597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,3584,0.019099734226862588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,3072,0.00869866708914439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,3072,0.017782400051752724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,10240,0.019076265891393027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,2560,0.007830399771531422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,2560,0.01693120002746582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,6144,0.013041067123413085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,8192,0.016463999946912132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,2048,0.007124266525109608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,7168,0.014661332964897156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,2048,0.016718933979670204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,1536,0.0060810665289560955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,1536,0.01546346644560496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,1024,0.004298666616280874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,1024,0.015461333592732749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,5120,0.011917866269747416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,768,0.004012800008058548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,768,0.015253333250681558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,4096,0.01058240036169688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,3072,0.008754133184750875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,512,0.004443733394145966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,512,0.015212800105412802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,3584,0.009920000036557516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,256,0.0033301333586374915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,256,0.014629333217938741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,2560,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,128,0.0030239999294281008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,128,0.014212266604105631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,64,0.0029002666473388673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,64,0.014328533411026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,3072,32,0.0028629332780838014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,3072,32,0.014355199535687766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,1536,0.006188799937566122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,65536,0.09280746777852376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,2048,0.006844800213972728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,65536,0.07287253538767496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,16384,0.02701759934425354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,1024,0.005649066468079885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,16384,0.029814400275548297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,12288,0.021496532360712688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,12288,0.025570134321848553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,10240,0.018863999843597413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,768,0.00517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,10240,0.023721599578857423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,8192,0.01613653302192688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,8192,0.02255679965019226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,512,0.004974933465321859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,256,0.004794666667779287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,7168,0.014556800325711569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,7168,0.02151893377304077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,6144,0.013194666306177775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,3072,128,0.004642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,6144,0.020890667041142782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,12288,0.021466666460037233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,65536,0.09555839697519938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,5120,0.011662933230400085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,16384,0.0273418664932251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,5120,0.019745065768559774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,4096,0.009959466258684794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,4096,0.01973653237024943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,3584,0.009262933333714803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,10240,0.018696532646814982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,3584,0.017786665757497152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,3072,0.008388266960779826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,3072,0.01718719998995463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,2560,0.00767680009206136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,2560,0.01681386629740397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,8192,0.016056533654530844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,2048,0.006940799951553345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,2048,0.01628373364607493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,7168,0.014407466848691305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,1536,0.005570133527119955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,6144,0.01288746694723765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,1536,0.01607253352801005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,1024,0.004355200131734212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,1024,0.015307733416557312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,4096,0.010376532872517902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,3072,0.008110933502515157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,5120,0.01172693371772766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,768,0.003957333415746689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,768,0.015109333395957946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,512,0.0036778666079044344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,3584,0.009152000149091084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,512,0.014877866705258688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,256,0.0033557333052158357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,2560,0.007516799867153168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,256,0.01448853313922882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,128,0.0030389333764712016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,128,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,1536,0.0061749334136645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,64,0.002792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,64,0.014533332983652749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2560,32,0.002916266769170761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2560,32,0.01448853313922882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,65536,0.07266240119934082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,2048,0.006785066425800323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,65536,0.06298986673355103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,1024,0.005610666672388713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,16384,0.022357332706451415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,16384,0.027010132869084675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,12288,0.018156800667444864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,12288,0.02416426738103231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,10240,0.01581546664237976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,10240,0.022899200518925987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,768,0.005498666564623515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,512,0.005049600203831991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,256,0.004753066599369049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2560,128,0.004653866589069367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,8192,0.013336533308029174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,8192,0.021095466613769532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,7168,0.0127210666735967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,7168,0.02045546571413676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,6144,0.011437867085138958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,6144,0.0198634664217631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,5120,0.010401067137718201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,5120,0.020012799898783365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,65536,0.08164052963256836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,16384,0.023769599199295045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,4096,0.009324799974759419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,12288,0.01881386637687683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,4096,0.01761386593182882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,3584,0.008719999591509502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,3584,0.017166932423909508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,10240,0.016796799500783284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,3072,0.008222933113574981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,3072,0.017094399531682333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,2560,0.007396266857783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,2560,0.016823466618855795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,8192,0.014518400033315023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,2048,0.006088533500830332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,2048,0.01625279982884725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,7168,0.013132799665133157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,6144,0.011832533280054729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,1536,0.005060266455014547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,1536,0.015711999932924905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,1024,0.004312533140182495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,5120,0.010859733819961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,1024,0.015833600362141927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,768,0.004004266609748204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,768,0.015361066659291586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,4096,0.009250133236249288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,512,0.00360000009338061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,512,0.01515733301639557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,3072,0.007749333480993907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,256,0.003197866678237915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,3584,0.008615466952323913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,256,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,128,0.0029397333661715193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,128,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,2560,0.007208533088366191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,64,0.002829866607983907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,2048,0.006603733201821645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,64,0.014324266711870828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,2048,32,0.0028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,2048,32,0.014328533411026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,1024,0.005401599903901418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,65536,0.05767999887466431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,1536,0.006002133091290792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,65536,0.05464853445688883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,768,0.005252266426881155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,16384,0.018891733884811402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,16384,0.024977066119511924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,12288,0.014820266763369241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,12288,0.021627734104792275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,10240,0.012723199526468911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,10240,0.021476266781489055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,512,0.00494400014479955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,8192,0.011009066303571065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,8192,0.02061226765314738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,256,0.0047541335225105286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,7168,0.010390399893124899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,2048,128,0.004564266900221507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,7168,0.01986453334490458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,6144,0.009643733501434326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,6144,0.01965120037396749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,5120,0.009358933568000794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,5120,0.018949333826700845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,65536,0.08080639839172363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,16384,0.023086933294932048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,4096,0.008582400282224019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,4096,0.018172800540924072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,12288,0.01888426740964254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,3584,0.008205866813659668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,3584,0.01771199901898702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,3072,0.007666133344173431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,10240,0.016739199558893837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,3072,0.017067732413609822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,2560,0.006659199794133504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,8192,0.014388266205787658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,2560,0.016410666704177856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,2048,0.005602133274078369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,7168,0.012963199615478515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,2048,0.016150400042533875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,6144,0.011707733074824016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,1536,0.00490880012512207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,1536,0.01593706707159678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,1024,0.004153600086768469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,5120,0.01027733286221822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,1024,0.01553813318411509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,768,0.003910399973392487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,768,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,4096,0.008709333340326945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,512,0.003570133447647095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,512,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,3072,0.007614933451016744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,3584,0.008393599589665731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,256,0.003156266609827677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,256,0.014615466197331747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,128,0.002882133424282074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,2048,0.006491733094056447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,128,0.014295466740926108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,2560,0.00719893326361974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,64,0.002850133428970973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,64,0.014553599556287131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1536,32,0.0028789333999156954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1536,32,0.014516266187032065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,65536,0.03893119891484578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,1024,0.005334400137265523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,65536,0.045459198951721194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,1536,0.006028800209363302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,768,0.005166933437188466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,16384,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,16384,0.022884267568588256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,12288,0.01172693371772766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,12288,0.020610133806864418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,10240,0.010686933000882467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,10240,0.02001813252766927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,8192,0.009733333190282186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,8192,0.018857600291570027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,512,0.004920533299446106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,7168,0.009139200051625569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,256,0.004716800153255462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,7168,0.019402666886647543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1536,128,0.004479999840259552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,6144,0.008739200234413148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,6144,0.01836693286895752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,5120,0.008155733346939087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,5120,0.018782933553059898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,16384,0.023031467199325563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,4096,0.007202133536338806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,12288,0.018601600329081217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,4096,0.01808639963467916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,3584,0.007192533214886982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,3584,0.017511467138926186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,10240,0.01644159952799479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,3072,0.006734933455785115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,3072,0.01692053278287252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,65536,0.07980480194091796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,2560,0.0062837332487106325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,7168,0.012265599767367045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,2560,0.01635840038458506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,6144,0.01090773344039917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,2048,0.005421866476535797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,2048,0.016229333480199178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,5120,0.009943466385205586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,1536,0.0047647997736930845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,1536,0.01581653356552124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,8192,0.013707733154296875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,1024,0.004138666639725367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,1024,0.015265066425005594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,4096,0.008701866865158081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,768,0.003925333420435587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,768,0.015346133708953857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,3584,0.008278400202592214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,512,0.0034773332377274835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,512,0.014877866705258688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,256,0.0032159999012947083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,256,0.014331733187039694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,3072,0.007521066566308339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,128,0.003067733347415924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,128,0.014221866925557455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,2048,0.006312533219655354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,2560,0.0071829333901405334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,64,0.0027327999472618104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,64,0.014332800110181173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,1024,32,0.002902399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,1024,32,0.014387200276056925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,65536,0.032560000816981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,65536,0.04187519947687785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,1536,0.00597866674264272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,16384,0.011296000083287556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,1024,0.00525546669960022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,16384,0.021016534169514975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,12288,0.009959466258684794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,12288,0.019730132818222047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,10240,0.009059199690818786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,10240,0.01957119901974996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,768,0.005018666883309682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,8192,0.008355200290679932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,8192,0.019191465775171914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,512,0.004735999802748362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,7168,0.008081066608428954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,256,0.004637866715590159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,7168,0.019100799163182577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,6144,0.00738560010989507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,1024,128,0.004427733520666758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,6144,0.018453333775202432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,5120,0.007208533088366191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,5120,0.018524799744288126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,65536,0.08020479679107666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,4096,0.00647573322057724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,16384,0.0230293333530426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,4096,0.017573332786560057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,12288,0.018438400824864705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,3584,0.006820266445477803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,3584,0.017008000612258913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,3072,0.006353066861629486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,3072,0.016729599237442015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,10240,0.016005333264668783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,8192,0.013442132870356241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,2560,0.006200533111890157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,2560,0.017065600554148356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,2048,0.005604266623655955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,2048,0.016125866770744325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,7168,0.012038399775822956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,1536,0.004786133269468943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,1536,0.015667200088500977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,6144,0.010825600226720173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,1024,0.00413973331451416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,5120,0.009635200103123982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,1024,0.015280000368754067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,768,0.0037600000699361167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,768,0.015770666797955833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,4096,0.008614400029182434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,512,0.0033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,512,0.014698666334152222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,3072,0.007421866556008657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,3584,0.008229333162307739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,256,0.003124266614516576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,256,0.01432960033416748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,128,0.002932266642649968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,128,0.014250666896502177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,2048,0.006450133522351582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,2560,0.007222400108973186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,64,0.0027434666951497394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,64,0.014203733205795288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,768,32,0.0027797333896160126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,768,32,0.01418773333231608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,65536,0.023860265811284385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,1024,0.005371733506520589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,65536,0.036457598209381104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,1536,0.006049066781997681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,16384,0.009763200084368389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,16384,0.019950934251149497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,768,0.004995200037956238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,12288,0.008694400389989216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,12288,0.0185589333375295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,10240,0.008277333279450735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,10240,0.018972800175348917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,8192,0.00727893312772115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,8192,0.018425599733988444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,512,0.004753066599369049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,256,0.004612266520659129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,7168,0.006814933319886525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,768,128,0.004412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,7168,0.018662399053573607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,6144,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,6144,0.018348799149195353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,65536,0.07972266674041747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,5120,0.0065984000762303666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,16384,0.02267626722653707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,5120,0.018886399269104005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,4096,0.006177066763242086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,12288,0.017837866147359212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,4096,0.01778879960378011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,3584,0.0065738668044408154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,3584,0.01725013256072998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,3072,0.006355200211207073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,3072,0.016643200318018594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,10240,0.015587199727694193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,2560,0.006064000229040781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,2560,0.01636799971262614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,8192,0.013352533181508383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,2048,0.005423999826113383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,2048,0.015940266847610473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,7168,0.011960533261299134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,1536,0.004782933493455251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,1536,0.015718400478363037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,6144,0.010689066847165425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,1024,0.004043733328580856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,1024,0.015066666404406228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,5120,0.009752532839775086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,768,0.0038015998899936674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,768,0.015140266219774882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,4096,0.008566400408744812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,512,0.0033845332761605583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,512,0.014684800306955972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,3072,0.007522133489449819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,3584,0.008212266862392426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,256,0.0030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,256,0.01430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,128,0.002915200094381968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,128,0.01418346663316091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,2560,0.00705813318490982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,64,0.00271573339899381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,64,0.0141184002161026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,512,32,0.002726399898529053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,2048,0.006307200094064077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,512,32,0.014007467031478881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,65536,0.01518186628818512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,65536,0.030434133609135945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,16384,0.00763733337322871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,16384,0.019433599710464478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,1536,0.005904000004132589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,1024,0.005182933310667674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,12288,0.006628266473611195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,12288,0.018985599279403687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,10240,0.006538666784763336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,10240,0.01892906626065572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,768,0.00494400014479955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,8192,0.006587733328342438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,512,0.004802133142948151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,8192,0.018221867084503175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,7168,0.0063274666666984555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,256,0.004458666841189066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,7168,0.01844693422317505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,6144,0.006122666597366333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,512,128,0.004433066646258036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,6144,0.01824959913889567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,5120,0.006488533318042755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,5120,0.018295466899871826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,65536,0.07939199606577554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,4096,0.006056533257166544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,4096,0.017543466885884602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,16384,0.021991467475891112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,3584,0.006586666901906331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,12288,0.017644800742467246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,3584,0.01738986571629842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,3072,0.006223999957243601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,10240,0.015399466951688132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,3072,0.016645333170890807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,2560,0.006050133208433787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,2560,0.016357333461443583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,8192,0.013176533579826354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,2048,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,2048,0.015953066945075988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,7168,0.011945600310961407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,1536,0.004709333181381226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,6144,0.010705066720644633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,1536,0.015709867080052696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,1024,0.004035199930270513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,1024,0.015278933445612588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,5120,0.009739733735720317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,768,0.0036821333070596062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,768,0.015244799852371215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,4096,0.008556800087292989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,512,0.0034005333979924522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,512,0.014490666985511779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,3072,0.0074527998765309645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,3584,0.00825386643409729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,256,0.0030432000756263735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,256,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,2048,0.0062943999965985615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,2560,0.0069930667678515124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,128,0.002796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,128,0.014321066935857139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,64,0.002699733277161916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,64,0.014056533575057983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,256,32,0.0026496000587940215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,1024,0.005160533388455709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,256,32,0.014034133156140646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,1536,0.005901866654555003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,65536,0.011170132954915365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,768,0.00491946687301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,65536,0.02751893401145935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,16384,0.006262399752934774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,16384,0.018990933895111084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,12288,0.0061589335401852924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,12288,0.01858773430188497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,10240,0.006326400240262349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,10240,0.018601600329081217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,8192,0.0061589335401852924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,512,0.004836266736189524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,8192,0.018141865730285645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,7168,0.006080000102519989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,7168,0.01825173298517863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,256,0.004557866851488749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,6144,0.005983999868233999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,256,128,0.004329599936803182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,6144,0.018454400698343913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,5120,0.006363733112812043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,5120,0.018258132537206016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,65536,0.0785045305887858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,16384,0.021961599588394165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,4096,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,12288,0.017679999272028603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,4096,0.017694934209187826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,3584,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,3584,0.017058134078979492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,3072,0.0060703997810681665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,10240,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,3072,0.01720959941546122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,2560,0.006053333481152853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,2560,0.016477866967519125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,8192,0.013275733590126038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,2048,0.005373866856098175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,2048,0.015943466623624166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,7168,0.011811199784278869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,1536,0.004728533327579498
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,6144,0.010794666409492493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,1536,0.015742933750152587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,1024,0.0039818666875362395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,1024,0.015229866902033488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,5120,0.009657599528630574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,768,0.003702399879693985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,768,0.015219199657440185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,4096,0.008556800087292989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,512,0.003306666761636734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,512,0.014838400483131408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,3072,0.007358933488527934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,3584,0.008107733229796093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,256,0.003011200080315272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,256,0.014330666263898215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,128,0.002794666588306427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,128,0.014255999525388082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,2560,0.00699839989344279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,64,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,64,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,128,32,0.0026079999903837843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,2048,0.006331733365853627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,128,32,0.014109866817792258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,65536,0.008730666836102803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,65536,0.026207999388376875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,16384,0.006201600035031637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,1024,0.005275733272234599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,16384,0.01941759983698527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,12288,0.006165333092212677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,1536,0.005853866537412008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,12288,0.018521600961685182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,10240,0.006276266773541768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,768,0.005039999882380167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,10240,0.018940800428390504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,8192,0.006190933287143707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,8192,0.018081067005793254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,7168,0.0060597335298856105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,6144,0.01825493375460307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,7168,0.018396800756454466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,6144,0.005959466596444448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,5120,0.0062047998110453285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,5120,0.018552533785502114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,4096,0.005915733178456625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,4096,0.017402666807174682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,3584,0.006600533425807953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,3584,0.016768000523249307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,3072,0.00597866674264272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,2560,0.0162581334511439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,3072,0.01635840038458506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,2560,0.006049066781997681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,2048,0.005407999952634176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,512,0.004772266745567322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,2048,0.016101333498954772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,1536,0.00476800004641215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,256,0.00456639975309372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,1536,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,1024,0.004126933217048645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,1024,0.01504746675491333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,768,0.003585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,64,128,128,0.004477866490681966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,768,0.014886400103569031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,512,0.0032511999209721885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,512,0.014434132973353067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,256,0.0031487998863061273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,256,0.014349866906801859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,32,0.0026506667335828146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,128,0.002705066651105881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,128,0.014221866925557455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,64,64,0.002629333237806956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,64,0.013980799913406372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,64,32,0.014184533556302389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,65536,0.009742933511734008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,65536,0.02627840042114258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,16384,0.006133333345254262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,16384,0.019347200791041054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,12288,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,12288,0.018331732352574667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,10240,0.006231466432412466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,10240,0.018449066082636516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,8192,0.006153599917888641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,8192,0.0180896004041036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,7168,0.0059114664793014525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,7168,0.018372267484664917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,6144,0.005793066819508871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,6144,0.01816320021947225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,5120,0.006084266801675161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,3584,0.01694613297780355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,5120,0.01797653237978617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,4096,0.005725866556167603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,4096,0.01744426687558492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,3584,0.006253866851329804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,3072,0.00584853341182073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,2560,0.006040533383687338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,3072,0.016770132382710776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,2560,0.016401066382726034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,1024,0.015126400192578635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,2048,0.005242666602134705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,2048,0.015946666399637856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,1536,0.0047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,1536,0.015452800194422403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,1024,0.004031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,768,0.0035594666997591654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,768,0.014739200472831726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,512,0.0033781332274278007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,512,0.014524799585342408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,256,0.002962133288383484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,256,0.014291200041770934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,128,0.00271573339899381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,128,0.014291200041770934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,64,0.0025386666258176167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,64,0.01386666695276896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,64,32,32,0.002552533398071925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,64,32,32,0.014046933253606161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,16384,0.2813141187032064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,16384,0.5292309443155925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,16384,0.27210772832234703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,12288,0.21458667119344077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,12288,0.39696426391601564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,12288,0.1967850685119629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,10240,0.33179521560668945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,10240,0.18237120310465496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,8192,0.2797973314921061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,8192,0.15238186518351238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,8192,0.13550186157226562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,7168,0.23372373580932618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,7168,0.13158613046010334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,7168,0.12698880036671956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,6144,0.20775252978006997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,6144,0.129694930712382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,5120,0.11084372997283935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,5120,0.16886506080627442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,4096,0.13713493347167968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,4096,0.08618666330973307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,3584,0.11884693304697673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,3584,0.07470186551411948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,3072,0.10419413248697917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,3072,0.0670261303583781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,2560,0.0868607997894287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,2560,0.0585098663965861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,2560,0.04877013365427653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,2048,0.07091519832611085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,2048,0.051430400212605795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,1536,0.05453866720199585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,10240,0.16627945899963378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,1536,0.042472533384958905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,1024,0.03865813414255778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,1024,0.03412373463312785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,6144,0.1065941333770752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,768,0.033779199918111166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,5120,0.09049493471781413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,768,0.02990506688753764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,4096,0.07236693700154623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,512,0.023075199127197264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,512,0.024320000410079957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,3584,0.06831573645273845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,3072,0.05668266614278158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,256,0.013427199920018515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,256,0.019989333550135293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,256,0.0134442667166392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,128,0.008261333405971526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,2048,0.040090668201446536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,128,0.018193066120147705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,64,0.0060586666067441305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,64,0.017242666085561117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,65536,32,0.00556160012880961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,1536,0.03439679940541585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,65536,32,0.017486933867136636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,1024,0.02481386661529541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,65536,0.5411370595296223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,65536,0.28385918935139975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,16384,0.1255616029103597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,16384,0.08721493085225424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,12288,0.09551680088043213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,768,0.02222933371861776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,12288,0.06420053243637085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,10240,0.08097173372904459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,10240,0.05654933452606201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,512,0.017092265685399375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,10240,0.047332266966501876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,8192,0.06544853448867798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,8192,0.04854613145192464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,7168,0.05775573253631592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,7168,0.0441429336865743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,65536,128,0.011760000387827556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,6144,0.040532267093658446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,6144,0.05189760128657023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,6144,0.03125226696332296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,5120,0.042734932899475095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,5120,0.037043201923370364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,4096,0.035447466373443606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,4096,0.032994133234024045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,3584,0.03207146724065145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,3584,0.03057173291842143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,65536,0.26767145792643227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,16384,0.07737066745758056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,3072,0.030264532566070555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,3072,0.02804693380991618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,12288,0.054919465382893884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,2560,0.023707733551661173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,2560,0.025730133056640625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,2048,0.019717333714167277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,2048,0.02334400018056234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,7168,0.03535466591517131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,1536,0.017123200496037803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,1536,0.021357866128285725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,5120,0.027584000428517656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,8192,0.0392959992090861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,1024,0.0114847997824351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,4096,0.023119999965031942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,1024,0.019032533963521323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,768,0.009531733393669129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,1024,0.01002346674601237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,768,0.017972266674041747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,512,0.0073088000218073535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,3584,0.02142080068588257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,512,0.015974400440851848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,256,0.00461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,256,0.015312000115712484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,3072,0.019056000312169395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,128,0.003739733248949051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,2560,0.01713386575380961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,128,0.015289599696795145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,64,0.0034847999612490333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,64,0.01516480048497518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,16384,32,0.003571200122435888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,2048,0.014556800325711569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,16384,32,0.015266133348147073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,65536,0.4001173337300618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,65536,0.22743040720621743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,1536,0.012430933117866517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,16384,0.11482986609141033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,16384,0.06799360116322836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,12288,0.08018026351928711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,12288,0.054628264904022214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,768,0.009173333644866943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,10240,0.06895360151926676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,10240,0.048547200361887616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,10240,0.037444265683492024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,512,0.007233066856861115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,256,0.006268799801667531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,8192,0.0537226676940918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,8192,0.04193600018819173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,7168,0.04693546692530314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,7168,0.03842560052871704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,6144,0.04073066711425781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,16384,128,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,6144,0.03529173135757446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,5120,0.03501226504643758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,5120,0.031939200560251874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,4096,0.028893866141637164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,4096,0.02880000074704488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,65536,0.20487252871195474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,3584,0.02762453357378642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,16384,0.05586346785227457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,12288,0.04305386543273926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,3584,0.027214932441711425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,3072,0.022545067469278972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,3072,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,8192,0.030884265899658203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,7168,0.02802026669184367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,2560,0.019630932807922365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,2560,0.02302079995473226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,6144,0.025139200687408447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,2048,0.016475733121236166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,2048,0.02178879976272583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,5120,0.022171733776728313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,1536,0.01308799982070923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,1536,0.01991466681162516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,1024,0.009795199831326802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,4096,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,1024,0.018332799275716148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,768,0.008210133512814839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,768,0.01712426741917928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,512,0.006523733337720235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,3584,0.017343999942143758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,512,0.015875200430552162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,256,0.003960533440113068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,256,0.015153066317240397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,3072,0.015430399775505066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,128,0.0036831999818483984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,128,0.014909866452217101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,2560,0.013677866260210673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,64,0.003278933217128118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,2048,0.01181653340657552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,12288,32,0.0033130665620168054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,64,0.014826666315396628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,1536,0.010391466816266378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,12288,32,0.014968533317248026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,1024,0.008712533116340637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,16384,0.09243093331654867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,65536,0.35713920593261717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,65536,0.19557013511657714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,768,0.00761706680059433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,16384,0.0652234673500061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,512,0.006055466830730438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,12288,0.07173333168029786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,12288,0.047332266966501876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,10240,0.06171093384424845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,10240,0.041740798950195314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,8192,0.05156160195668539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,256,0.005578666428724925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,8192,0.036423468589782716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,7168,0.038423466682434085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,12288,128,0.005036800106366476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,7168,0.034082134564717606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,6144,0.03384106556574504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,6144,0.03144320050875346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,5120,0.028387200832366944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,5120,0.028885332743326823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,5120,0.019458132982254028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,4096,0.02325119972229004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,4096,0.025641600290934246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,65536,0.17443200747172039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,3584,0.022980266809463502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,16384,0.052085332075754796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,3584,0.024965333938598632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,12288,0.03728426694869995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,3072,0.018948266903559365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,3072,0.02416960000991821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,3072,0.013317333658536276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,10240,0.032579199473063154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,2560,0.016270933548609416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,8192,0.027169066667556762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,2560,0.021719467639923096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,2048,0.013474133610725404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,2048,0.02049386699994405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,7168,0.02450773318608602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,1536,0.010859733819961548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,1536,0.019654399156570433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,6144,0.021927465995152794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,1536,0.009339732925097148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,1024,0.01824959913889567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,1024,0.00848746697107951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,768,0.0072543998559316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,768,0.01690559983253479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,512,0.005907199780146281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,512,0.015446399648984274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,4096,0.01673706571261088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,256,0.003853866706291834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,256,0.014882133404413859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,128,0.003533866753180822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,128,0.014710399508476257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,3584,0.015793066223462424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,64,0.0032085334261258446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,64,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,2560,0.012281599640846252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,10240,32,0.003236266722281774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,10240,32,0.014793599645296732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,2048,0.010618666807810467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,65536,0.26538027127583824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,65536,0.15837972958882648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,1024,0.008067200084527333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,16384,0.07740693092346192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,16384,0.049525332450866696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,768,0.00639466643333435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,12288,0.054366934299469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,512,0.005814399818579356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,12288,0.04116373459498088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,12288,0.03145813345909119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,10240,0.04269546667734782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,10240,0.03712960084279378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,256,0.0052928000688552855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,8192,0.03531946738560994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,8192,0.03277759949366252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,7168,0.03106879989306132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,7168,0.03126186728477478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,6144,0.027315199375152588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,10240,128,0.00492799977461497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,6144,0.02871253291765849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,5120,0.02370880047480265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,5120,0.02585386633872986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,65536,0.1491754690806071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,4096,0.02183893322944641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,16384,0.0411296010017395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,4096,0.024035199483235677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,3584,0.0176362673441569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,3584,0.022756266593933105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,3072,0.015761066476504007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,10240,0.02758293350537618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,8192,0.023492266734441124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,3072,0.02168533404668172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,2560,0.013748266299565635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,2560,0.02060906688372294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,7168,0.02094506621360779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,2048,0.011362133423487346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,2048,0.0194815993309021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,6144,0.019287467002868652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,1536,0.009478400150934856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,1536,0.01816426714261373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,1024,0.007436800003051758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,5120,0.016785067319869996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,1024,0.016546133160591125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,768,0.00631573349237442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,768,0.01596799989541372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,4096,0.014390400052070618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,512,0.0044383997718493145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,512,0.01550933321317037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,3584,0.013380266229311624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,256,0.0036864000062147772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,3072,0.012097066640853882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,256,0.014840533336003622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,2560,0.01112000048160553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,128,0.0033919999996821085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,128,0.014842666188875833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,2048,0.009847467144330341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,64,0.003223466624816259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,64,0.014837333559989929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,8192,32,0.0032981333633263906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,8192,32,0.015126400192578635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,1536,0.008777599533398945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,65536,0.24894612630208335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,65536,0.14757013320922852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,1024,0.006817066669464111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,16384,0.06981333096822104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,16384,0.0463210662206014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,768,0.006364800035953522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,12288,0.0457696000734965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,12288,0.038786133130391434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,12288,0.029999999205271403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,10240,0.0391487995783488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,10240,0.03497386773427327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,8192,0.031863466898600264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,8192,0.031436800956726074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,512,0.005791999896367391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,7168,0.02832319935162862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,7168,0.02922240098317464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,6144,0.02490026752154032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,256,0.005427200098832448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,6144,0.02685760060946147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,8192,128,0.005143466591835022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,5120,0.021547732750574748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,5120,0.024665600061416625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,4096,0.018196266889572144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,4096,0.02278719941775004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,65536,0.1384885311126709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,3584,0.017488000790278117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,16384,0.03880426486333211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,3584,0.02190399964650472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,3584,0.01214400033156077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,3072,0.014392532904942832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,10240,0.02648426691691081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,3072,0.02097919980684916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,2560,0.012360533078511555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,8192,0.02209386626879374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,2560,0.019686400890350342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,7168,0.019986132780710854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,2048,0.010628267129262289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,2048,0.018926932414372762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,1536,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,1536,0.0183242658774058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,6144,0.017570134003957114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,1024,0.006853333115577698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,1024,0.015331199765205384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,5120,0.015624533096949259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,768,0.0061247999469439185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,768,0.015921066204706825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,4096,0.013367467125256858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,512,0.0040853333969910945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,512,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,3072,0.011161599556605022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,256,0.0035573333501815797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,2560,0.010558933019638062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,256,0.014793599645296732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,2048,0.009343999624252319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,128,0.0034080001215140024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,128,0.014598400394121806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,1536,0.008195200065771738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,64,0.0031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,64,0.014737066626548768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,7168,32,0.003257599969704946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,1024,0.006427733103434245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,7168,32,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,65536,0.19703572591145832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,16384,0.055978667736053464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,65536,0.12401066621144612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,768,0.006027733286221823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,16384,0.045702401796976724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,12288,0.04443306525548299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,12288,0.03618133465449015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,10240,0.038430933157602945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,10240,0.033160533507665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,8192,0.029207466046015422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,512,0.005469866593678792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,8192,0.02993173400561015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,7168,0.025526400407155352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,7168,0.027674667040507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,256,0.005240533252557119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,6144,0.02272426684697469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,7168,128,0.0048096001148223875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,6144,0.025547732909520466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,5120,0.019755733013153077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,5120,0.023715200026830037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,65536,0.12483946482340495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,16384,0.03618133465449015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,4096,0.017534933487574258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,12288,0.027534933884938557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,4096,0.02198186715443929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,3584,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,10240,0.02414720058441162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,3584,0.021229867140452066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,3072,0.013091199596722922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,3072,0.02034986615180969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,8192,0.02076693375905355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,2560,0.011547733346621196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,2560,0.01951573292414347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,7168,0.018557866414388023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,2048,0.010034132997194927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,2048,0.01875093380610148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,1536,0.008405333757400513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,1536,0.017164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,6144,0.016438399751981102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,1024,0.00647680014371872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,1024,0.015778133273124696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,5120,0.014681599537531533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,768,0.005458133419354757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,768,0.015485866864522298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,4096,0.012524799505869547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,512,0.004011733333269755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,3584,0.011494400103886922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,512,0.015058133006095886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,256,0.0034634667138258614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,3072,0.010622933506965637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,256,0.014751999576886495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,128,0.003305600086847941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,2560,0.00976639986038208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,128,0.01456106702486674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,64,0.0030773334205150605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,2048,0.008919466535250347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,64,0.014512000481287637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,6144,32,0.0031413334111372627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,6144,32,0.014681599537531533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,1536,0.007496533294518788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,65536,0.17584106127421062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,65536,0.11205653349558513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,16384,0.0519157330195109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,1024,0.006226133306821187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,16384,0.040387201309204104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,12288,0.037562668323516846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,12288,0.03227306604385376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,768,0.005796266595522562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,10240,0.0325653334458669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,10240,0.02948906620343526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,512,0.005385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,8192,0.02738773425420125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,8192,0.027102933327356978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,7168,0.02136533260345459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,256,0.00494400014479955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,7168,0.02473599910736084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,6144,128,0.0046079998215039575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,6144,0.018934400876363118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,6144,0.02345386743545532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,5120,0.01665066679318746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,5120,0.02186773419380188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,65536,0.11815893650054932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,16384,0.03367466529210408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,4096,0.014076800147692362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,12288,0.02571626702944438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,4096,0.020666666825612388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,3584,0.01241600016752879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,10240,0.022683733701705934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,3584,0.020068265994389854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,3072,0.010989866654078166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,8192,0.01962666710217794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,3072,0.019332265853881835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,7168,0.017110399405161538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,2560,0.009788800279299419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,2560,0.018924800554911296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,6144,0.015124266346295675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,2048,0.008499200145403545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,2048,0.017926400899887084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,1536,0.007349333167076111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,1536,0.017004799842834473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,1024,0.0059914668401082356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,5120,0.014600533246994018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,1024,0.015550933281580605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,768,0.004560000201066335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,768,0.015262933572133383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,4096,0.012206932902336121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,512,0.0038272000849246977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,3584,0.010959999759991963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,512,0.015229866902033488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,256,0.0034101332227389016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,256,0.014599466323852539
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,3072,0.010266666611035664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,128,0.0031328000128269195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,2560,0.009410132964452107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,128,0.014613333344459533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,64,0.0029567999144395193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,2048,0.008497066299120585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,64,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,5120,32,0.0029674666623274487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,5120,32,0.014538666605949402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,1536,0.0069248000780741375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,65536,0.1377461274464925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,1024,0.006101333101590474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,65536,0.09192319711049399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,16384,0.0412778655687968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,16384,0.035997867584228516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,12288,0.03001280029614766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,768,0.005666133264700572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,12288,0.030665600299835206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,10240,0.02592639923095703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,10240,0.02832000056902568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,512,0.005373866856098175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,8192,0.023091200987497965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,8192,0.02550293405850728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,7168,0.0197760005791982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,256,0.0048981333772341405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,7168,0.024155733982721965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,6144,0.017806933323542277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,6144,0.02278719941775004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,5120,128,0.004642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,5120,0.015777066349983215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,5120,0.020707199970881142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,65536,0.10122133096059163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,4096,0.013489066561063131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,16384,0.03182613253593445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,4096,0.019795199235280357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,12288,0.022265599171320597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,3584,0.012355200449625651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,10240,0.020201599597930907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,3584,0.019211733341217042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,3072,0.011362133423487346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,3072,0.018528000513712565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,8192,0.016785067319869996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,3072,0.009399466713269551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,2560,0.008504533767700195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,2560,0.01808746655782064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,2048,0.00767680009206136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,7168,0.014942933122316995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,2048,0.01672640045483907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,1536,0.006515199939409892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,6144,0.013957333564758301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,1536,0.016152532895406087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,1024,0.004952533543109894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,1536,0.006649599969387054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,1024,0.015549866358439126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,768,0.0039658665657043455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,5120,0.012195199728012085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,768,0.015267200271288552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,512,0.003701333453257879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,512,0.014993066589037577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,512,0.005293866495291392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,256,0.0032960000137488045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,256,0.014588800072669984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,4096,0.011229866743087768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,128,0.0030570665995279947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,128,0.014454399545987448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,3584,0.010315733154614766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,64,0.002890666574239731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,64,0.01446613371372223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,4096,32,0.0030933332939942675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,4096,32,0.014751999576886495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,2560,0.008801066875457763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,65536,0.1290175994237264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,65536,0.08529386520385743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,2048,0.007536000013351441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,16384,0.03414186636606852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,16384,0.03343679904937744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,12288,0.02744213342666626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,1024,0.005885866781075796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,12288,0.029028266668319702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,10240,0.023907200495402018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,768,0.005505066613356272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,10240,0.02645546595255534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,10240,0.018351999918619792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,8192,0.020537600914637247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,8192,0.02434239983558655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,256,0.004993066688378652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,7168,0.018270933628082277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,7168,0.023517866929372154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,4096,128,0.004675200084845225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,6144,0.0164192001024882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,6144,0.022338134050369263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,5120,0.014460800091425577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,5120,0.02063573400179545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,4096,0.012570666273434958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,65536,0.0964949369430542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,4096,0.019299199183781944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,4096,0.010949333508809406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,16384,0.028468267122904463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,3584,0.011478400230407715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,3584,0.019153066476186118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,12288,0.02097599903742472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,3072,0.010601600011189777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,3072,0.018480000893274943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,2560,0.00820479989051819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,2560,0.01760853330294291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,2560,0.008258133133252462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,2048,0.007331199944019318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,2048,0.016203733285268147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,8192,0.015440000096956888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,1536,0.006357333560784657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,1536,0.01573013365268707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,1024,0.004642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,7168,0.014084266622861228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,1024,0.015408000349998474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,6144,0.012793599565823873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,768,0.004075733323891958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,768,0.015132799744606018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,512,0.0036949334045251214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,512,0.015003732840220132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,5120,0.011542399724324543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,256,0.003286399940649668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,256,0.014536533753077188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,3584,0.009843200445175171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,128,0.003005866706371307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,3072,0.009052800138791402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,128,0.01425386667251587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,64,0.0028031999866167706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,64,0.014257066448529563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,2048,0.007144533097743988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3584,32,0.0028927999238173166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3584,32,0.014607999722162882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,1536,0.0065087998906771345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,65536,0.10333120028177897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,65536,0.0780191977818807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,16384,0.03370453516642253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,1024,0.0057536001006762184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,16384,0.0322762668132782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,12288,0.024759467442830405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,12288,0.027297067642211913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,768,0.005486933390299479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,10240,0.021592533588409422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,10240,0.0254314661026001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,8192,0.01837653319040934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,512,0.0051807999610900875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,8192,0.023897600173950196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,7168,0.016684800386428833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,7168,0.023127466440200806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,256,0.004867200056711833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3584,128,0.0047978664437929785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,6144,0.01511253317197164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,6144,0.02103360096613566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,5120,0.012636799613634744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,5120,0.02025066614151001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,65536,0.10265386899312337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,16384,0.029445334275563555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,4096,0.011733333269755047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,4096,0.019076265891393027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,12288,0.021828265984853108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,3584,0.010947199662526448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,3584,0.018781866629918417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,10240,0.02003306746482849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,3072,0.009964799880981446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,3072,0.017915733655293784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,8192,0.016408532857894897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,2560,0.007842133442560833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,2560,0.016696532567342125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,2048,0.007077333331108093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,7168,0.014863999684651694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,2048,0.01665600041548411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,1536,0.006066133578618368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,6144,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,1536,0.016586666305859886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,1536,0.006285866598288219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,1024,0.004444799820582072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,5120,0.012033067146937053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,1024,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,768,0.004023466755946477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,768,0.015044266978899637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,4096,0.010732799768447876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,512,0.0035797332723935447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,512,0.014656000336011252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,3584,0.00997759997844696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,256,0.003316266586383184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,3072,0.008703999718030294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,256,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,2560,0.00834986666838328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,128,0.0030154667794704436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,128,0.01434346636136373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,64,0.0028533334533373516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,2048,0.0071263998746871945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,64,0.014413866400718688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,3072,32,0.0029120000700155893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,3072,32,0.014570666352907815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,65536,0.09114133516947429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,65536,0.06848106384277344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,16384,0.026764800151189167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,1024,0.005739733576774597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,16384,0.028861866394678755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,12288,0.02151573300361633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,768,0.005398400127887726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,12288,0.024946133295694985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,10240,0.0185098667939504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,10240,0.023331199089686075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,512,0.004982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,8192,0.01578986644744873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,8192,0.022025599082310995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,7168,0.01411946713924408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,256,0.004789333542188009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,7168,0.020966400702794395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,6144,0.012601600090662638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,3072,128,0.004556799928347269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,6144,0.020713599522908528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,5120,0.011379200220108032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,5120,0.019476266702016194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,65536,0.09659732977549235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,4096,0.01030293305714925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,4096,0.019146666924158732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,16384,0.027672533194224042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,3584,0.009661866227785747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,12288,0.021182932456334434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,3584,0.018532266219456993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,3584,0.009638399879137675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,10240,0.018374399344126383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,3072,0.009077333410580953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,3072,0.01686506668726603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,2560,0.0078005333741505934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,8192,0.01597119967142741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,2560,0.016615466276804606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,2048,0.0068896000583966565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,2048,0.016246400276819863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,7168,0.01455466647942861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,1536,0.0054506664474805195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,1536,0.015829333662986757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,6144,0.013118933637936911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,1024,0.004385066529115042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,1024,0.015575466553370157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,768,0.004036266605059306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,5120,0.011780266960461933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,768,0.015118933717409768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,512,0.0035402665535608927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,512,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,4096,0.010446932911872864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,256,0.0032821332414944967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,256,0.014731733004252115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,3072,0.00804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,128,0.002994133283694585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,128,0.014361600081125895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,2048,0.007124266525109608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,64,0.0028319999575614927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,1536,0.006177066763242086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,64,0.014333867033322654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2560,32,0.002885333448648453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2560,32,0.014428800344467163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,1024,0.005636266867319743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,65536,0.06990506649017333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,65536,0.06010986566543579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,16384,0.021913599967956544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,65536,0.09117866357167562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,768,0.005273599922657013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,16384,0.02603093385696411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,12288,0.01775253415107727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,12288,0.02297066648801168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,512,0.005037866532802582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,10240,0.015608533223470052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,10240,0.022485333681106567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,8192,0.013689600427945457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,8192,0.021432532866795858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,7168,0.012578133742014566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,2560,0.007538133362929027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,7168,0.021449599663416544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,256,0.004884266853332519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,6144,0.011653332908948263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,6144,0.020482132832209267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2560,128,0.004537599782148996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,5120,0.010666666428248088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,5120,0.019592533508936562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,16384,0.02488320072491964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,4096,0.009505066275596618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,4096,0.018388267358144125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,12288,0.019654399156570433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,10240,0.017517866690953572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,3584,0.008764800429344178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,3584,0.017946666479110716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,3072,0.008036266764005024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,8192,0.015236266454060874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,3072,0.0167797327041626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,2560,0.007459199925263722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,2560,0.016432000199953715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,2048,0.0066453332702318835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,6144,0.012321066856384278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,2048,0.016182399789492288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,1536,0.005402666827042898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,5120,0.010875733693440755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,1536,0.015692800283432007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,1024,0.004260266820589701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,1024,0.015577600399653117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,4096,0.00920960009098053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,768,0.0040224000811576845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,768,0.0151829332113266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,3584,0.008733866612116496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,7168,0.013418666521708169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,512,0.0037429332733154297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,3072,0.007934933404127757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,512,0.014778666694959006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,256,0.0031648000081380212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,2560,0.007237333556016285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,256,0.014447999993960061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,128,0.002980266759792964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,128,0.01437333325544993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,64,0.0028512001037597655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,2048,0.007144533097743988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,64,0.014446933070818582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,2048,32,0.0028736000259717304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,2048,32,0.014419200023015341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,65536,0.057625599702199305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,1536,0.006231466432412466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,65536,0.05206826527913412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,16384,0.018639999628067016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,1024,0.005719466507434845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,16384,0.02373866637547811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,12288,0.01476800044377645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,12288,0.02183786630630493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,12288,0.01844586730003357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,10240,0.012654933333396911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,768,0.005347200234731038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,10240,0.02068159977595011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,8192,0.011291733384132386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,8192,0.019789866606394448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,512,0.004990933338801066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,7168,0.010618666807810467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,7168,0.01960106690724691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,256,0.004804266492525736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,6144,0.009858133395512898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,2048,128,0.004702933132648468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,6144,0.019858133792877198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,5120,0.008985599875450135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,5120,0.018153599898020425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,4096,0.008286933104197185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,4096,0.01759679913520813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,65536,0.0792138655980428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,3584,0.008182399968306223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,16384,0.0230730672677358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,3584,0.017499732971191406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,3072,0.007853866616884867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,3072,0.017298134167989095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,8192,0.013833600282669067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,2560,0.006687999765078227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,7168,0.012877866625785828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,2560,0.016476800044377647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,2048,0.00561599979797999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,6144,0.011640533804893494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,2048,0.016051200032234193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,1536,0.0049685334165891016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,5120,0.010136533776919048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,1536,0.01567359964052836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,1024,0.004229333500067393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,1024,0.015320533514022827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,4096,0.008774399757385254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,768,0.0038751999537150065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,768,0.014750933647155762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,3584,0.008346666892369587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,10240,0.01612053314844767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,512,0.0035616000493367515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,512,0.014973866939544677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,3072,0.007573333382606506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,256,0.0032277333239714304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,256,0.014453333616256715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,128,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,128,0.014257066448529563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,2560,0.007090133428573608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,2048,0.006609066824118296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,64,0.002792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,64,0.014137599865595499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,1536,0.005973333120346069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1536,32,0.0027104000250498454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1536,32,0.014426666498184203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,65536,0.03994239966074626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,1024,0.0053951998551686605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,65536,0.04213333527247111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,16384,0.013991467157999673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,16384,0.021875200668970744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,768,0.005097599824269613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,12288,0.011884799599647522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,12288,0.02147946755091349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,512,0.004881066580613455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,10240,0.010917333761850993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,10240,0.020487467447916664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,256,0.004613333443800608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,8192,0.009524266918500264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,8192,0.019718400637308755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,8192,0.013517866532007853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,7168,0.008924800157546996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1536,128,0.004433066646258036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,7168,0.01865066687266032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,6144,0.009640533725420635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,6144,0.0185973326365153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,5120,0.008923733234405517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,5120,0.018874667088190713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,4096,0.008353066444396973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,4096,0.01773866613705953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,65536,0.0784437338511149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,3584,0.007540266712506611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,16384,0.022935465971628825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,3584,0.01710933248202006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,12288,0.018346667289733887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,3072,0.006962133447329204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,3072,0.016774400075276693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,2560,0.006697600086530049
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,10240,0.01591146687666575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,2560,0.016666666666666666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,2048,0.0056426664193471275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,2048,0.016328533490498863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,7168,0.012108799815177918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,1536,0.004931200047334035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,6144,0.010819199681282043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,1536,0.015812266866366068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,5120,0.009758933385213216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,1024,0.004274133344491323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,1024,0.015315199891726175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,768,0.0038304001092910765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,4096,0.008578133583068848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,768,0.015238400300343832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,512,0.0034944000343481696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,3584,0.008237866560618083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,512,0.014775466918945313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,256,0.003171200056870779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,3072,0.007415466507275899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,256,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,128,0.00279573326309522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,2560,0.007042133311430614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,128,0.014301866292953491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,64,0.0026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,2048,0.006377600133419037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,64,0.01455573340257009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,1024,32,0.002762666592995326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,1024,32,0.014428800344467163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,65536,0.03218560020128886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,65536,0.03805973529815674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,1536,0.00590826670328776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,16384,0.01125333309173584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,1024,0.005246933301289876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,16384,0.020584533611933388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,12288,0.009516800443331402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,12288,0.019377066691716512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,768,0.004953599969546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,10240,0.009161600470542907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,10240,0.019811199108759562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,8192,0.008564266562461852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,512,0.0048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,8192,0.018619734048843383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,7168,0.007946666578451793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,256,0.004610133171081543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,7168,0.01895573337872823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,6144,0.008628267049789428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,1024,128,0.004452266792456309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,6144,0.018424532810846963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,5120,0.007733333110809326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,5120,0.01839359998703003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,5120,0.00962773362795512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,4096,0.006740266581376393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,65536,0.07778666814168295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,4096,0.01753386656443278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,3584,0.006681600213050842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,16384,0.02286720077196757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,3584,0.017058134078979492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,12288,0.018323200941085815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,3072,0.006354133288065593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,3072,0.01665386656920115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,2560,0.006163200239340464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,10240,0.01563093364238739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,2560,0.01660266617933909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,8192,0.012853333353996277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,2048,0.005555200080076853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,7168,0.011872000495592753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,2048,0.016129066546758018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,1536,0.00486826648314794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,1536,0.0155349334081014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,6144,0.010644267002741497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,1024,0.004161066561937332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,1024,0.01513813336690267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,4096,0.008605866630872091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,768,0.003787733366092046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,768,0.015069866180419922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,3584,0.008072533210118612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,512,0.0034474665919939675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,3072,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,512,0.015092266599337259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,256,0.003014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,2560,0.007037866612275441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,256,0.014251733819643656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,128,0.0027690666417280836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,128,0.01404159963130951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,2048,0.006307200094064077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,64,0.0027146667242050173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,64,0.01407360037167867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,768,32,0.0026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,1536,0.005905066430568695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,768,32,0.014272000392278036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,65536,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,65536,0.03305173317591349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,16384,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,16384,0.01989226738611857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,1024,0.005178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,12288,0.009458133578300476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,12288,0.01839039921760559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,10240,0.008690133690834045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,768,0.005046399931112925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,10240,0.019066667556762694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,8192,0.00795306662718455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,512,0.004760533571243286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,8192,0.018736000855763754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,256,0.004699733356634776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,7168,0.007319466769695282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,7168,0.018844799200693766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,6144,0.006773333251476288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,768,128,0.004408533374468485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,6144,0.018278400103251137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,5120,0.0066442668437957765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,5120,0.01832533280054728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,65536,0.07806399663289389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,4096,0.0061749334136645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,16384,0.022282665967941283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,4096,0.017237333456675212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,12288,0.017607466379801432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,3584,0.0066655998428662615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,3584,0.01703146696090698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,10240,0.015056000153223673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,3072,0.006351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,3072,0.0165173331896464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,2560,0.006155733267466227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,8192,0.012814933061599731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,2560,0.016511999567349753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,2560,0.006885333359241486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,7168,0.011718400319417318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,2048,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,2048,0.01609386702378591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,1536,0.004785066843032837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,6144,0.010630399982134501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,1536,0.015689599514007568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,1024,0.004020266731580098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,5120,0.009513599673906963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,1024,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,768,0.0037109332780043284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,768,0.015080533425013223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,512,0.003435733417669932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,512,0.014535466829935709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,4096,0.008521599570910136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,256,0.0031040000418821974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,3584,0.008038400113582611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,256,0.01416853368282318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,128,0.002845866729815801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,3072,0.007348266740640004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,128,0.014100266496340432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,64,0.002616533388694127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,64,0.014254933595657349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,512,32,0.0026101333399613695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,2048,0.006292266647020976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,512,32,0.014144000411033631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,65536,0.015317333738009134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,1536,0.0058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,65536,0.027934932708740236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,16384,0.007898666461308797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,1024,0.005151999990145365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,16384,0.019433599710464478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,12288,0.006586666901906331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,768,0.005019733309745788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,12288,0.018463999032974243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,10240,0.00658240020275116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,10240,0.018467199802398682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,8192,0.006451199948787689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,512,0.004746666550636292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,8192,0.017950934171676636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,256,0.004609066744645437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,7168,0.00621013343334198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,512,128,0.004439466694990794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,7168,0.018590933084487914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,6144,0.006090666850407918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,6144,0.018237866957982383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,65536,0.07751146952311197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,16384,0.02189226746559143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,5120,0.006487466891606649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,5120,0.018263467152913413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,5120,0.009525332848230999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,12288,0.01728640000025431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,4096,0.006138666470845541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,4096,0.01778879960378011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,3584,0.006492800017197926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,10240,0.015025066335995993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,3584,0.017100799083709716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,3072,0.0062165334820747375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,3072,0.016849066813786825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,2560,0.006154666841030121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,8192,0.012801067034403483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,2560,0.016317866245905557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,2048,0.005352533360322317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,7168,0.011918933192888895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,2048,0.016314666469891867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,1536,0.004727466901143392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,1536,0.015336533387502035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,1024,0.004075733323891958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,6144,0.010681600371996561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,1024,0.015980799992879234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,768,0.0036309334139029183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,768,0.014840533336003622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,768,0.00491839994986852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,512,0.003368533402681351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,512,0.014727466305096946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,4096,0.00844373305638631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,3584,0.008039466540018718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,256,0.0028959999481836954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,256,0.014521599809328715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,3072,0.007502933343251546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,128,0.00275093341867129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,128,0.01423679987589518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,128,0.004330666859944662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,64,0.0026986666023731233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,2560,0.006885333359241486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,64,0.0141184002161026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,256,32,0.0027295999228954316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,2048,0.00631466656923294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,256,32,0.014282666643460593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,65536,0.010621866583824158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,65536,0.025566933552424113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,16384,0.006252799928188324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,1536,0.005852800110975901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,16384,0.018953599532445273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,12288,0.006209066510200501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,1024,0.005166933437188466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,12288,0.018313600619633993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,10240,0.0064416001240412395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,10240,0.018755199511845906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,8192,0.006307200094064077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,8192,0.018363734086354576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,7168,0.006217599908510844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,7168,0.018761599063873292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,512,0.0047765334447224935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,6144,0.006025599936644236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,6144,0.017915733655293784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,256,256,0.004497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,5120,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,5120,0.018049067258834837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,65536,0.07665812969207764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,4096,0.006055466830730438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,16384,0.021848533550898233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,4096,0.017318399747212727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,12288,0.017293866475423178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,3584,0.006586666901906331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,3584,0.016925867398579916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,10240,0.014938666423161825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,3072,0.006131199995676676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,3072,0.01653333306312561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,8192,0.012711466352144877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,2560,0.006066133578618368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,2560,0.016669867436091106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,2048,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,7168,0.011725866794586181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,2048,0.01585599978764852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,2048,0.006305066744486491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,1536,0.004725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,1536,0.01553813318411509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,6144,0.010628267129262289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,1024,0.00407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,1024,0.015057067076365152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,768,0.0037503999968369803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,768,0.014747732877731323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,4096,0.008437333504358928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,512,0.00344106654326121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,512,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,3584,0.008150400221347808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,3072,0.007292800148328145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,256,0.0029824001093705496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,2560,0.006989866495132446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,256,0.014597333470980325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,5120,0.009502933422724406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,128,0.002762666592995326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,128,0.014045866330464682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,64,0.002533333251873652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,1536,0.005805866420269012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,64,0.014290133118629455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,128,32,0.0026367999613285064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,1024,0.005256533126036326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,128,32,0.014052266875902811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,65536,0.008506666620572407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,65536,0.024167466163635253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,16384,0.006197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,16384,0.019036799669265747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,12288,0.0061258668700853985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,768,0.005064533154169718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,12288,0.018270933628082277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,10240,0.01840959986050924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,10240,0.0063733334342638654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,8192,0.00617386649052302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,7168,0.006098133325576782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,8192,0.018149334192276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,7168,0.018247467279434205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,6144,0.005941333373387655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,6144,0.017926400899887084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,5120,0.006277333199977875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,5120,0.01796906590461731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,512,0.004760533571243286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,4096,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,4096,0.017090133825937905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,3584,0.006324266890684764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,3584,0.01681706706682841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,3072,0.0060479998588562015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,256,0.004540800054868063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,2560,0.006064000229040781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,3072,0.016713599363962807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,2560,0.016405333081881204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,2048,0.005388799806435903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,48,128,128,0.004323199888070424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,2048,0.015901866555213928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,1536,0.004743466774622599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,1536,0.015451733271280924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,1024,0.0040277334551016486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,1024,0.01502293348312378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,768,0.003619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,768,0.014899200201034546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,512,0.0033344000577926634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,512,0.014847999811172486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,256,0.0030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,256,0.01444906691710154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,128,0.0028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,128,0.01418560047944387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,64,0.0026015999416510267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,64,0.014139733711878457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,64,32,0.0026335999369621276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,64,32,0.014251733819643656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,65536,0.009076266487439474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,16384,0.006126933296521505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,65536,0.024310400088628135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,12288,0.0060479998588562015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,8192,0.017723733186721803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,16384,0.018667733669281004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,12288,0.01816213329633077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,10240,0.006286933521429698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,10240,0.018269866704940796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,8192,0.00613973339398702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,7168,0.00606826643149058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,7168,0.018269866704940796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,6144,0.005881600081920624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,6144,0.017824000120162962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,5120,0.006206933160622915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,5120,0.018093866109848023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,4096,0.0058442667126655575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,4096,0.017416532834370932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,3584,0.006253866851329804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,3584,0.016806399822235106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,3072,0.0059456000725428265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,3072,0.016794667641321818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,2560,0.0061141331990559895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,2560,0.01656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,2048,0.005277866621812185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,2048,0.016169599692026772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,1536,0.004691199958324432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,1536,0.015990400314331056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,1024,0.003927466770013174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,1024,0.015146666765213012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,768,0.0035349334279696143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,768,0.01487573285897573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,512,0.003340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,512,0.014657066265741984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,256,0.0030421334008375804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,256,0.014146133263905843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,128,0.0027903998891512555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,128,0.014165332913398743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,64,0.002598399917284648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,64,0.014196266730626425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,48,32,32,0.002567466596762339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,48,32,32,0.014098133643468222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,16384,0.5261098543802898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,16384,0.2863701184590658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,12288,0.2127903938293457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,12288,0.3948053359985352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,10240,0.3295210520426432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,10240,0.20407466888427733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,8192,0.2746815999348958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,8192,0.13505813280741374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,8192,0.15587306022644043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,7168,0.23231040636698402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,7168,0.1307477315266927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,6144,0.1992586612701416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,6144,0.11553599834442138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,6144,0.10321386655171712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,5120,0.16764160792032878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,5120,0.09867626825968424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,4096,0.13538986841837566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,4096,0.08220160007476807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,3584,0.11810879707336426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,3584,0.07358720302581787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,16384,0.2673365275065104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,12288,0.19688960711161296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,3072,0.10733973185221354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,3072,0.06858452955881754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,10240,0.17160746256510417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,2560,0.08592320283253987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,2560,0.05749119917551676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,2048,0.0700704018274943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,2048,0.05011733373006185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,7168,0.11874346733093262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,1536,0.053683201471964516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,1536,0.044210131963094076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,1536,0.031941332419713336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,1024,0.03766826788584392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,1024,0.033316266536712644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,5120,0.08847253322601319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,768,0.029312000672022505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,768,0.02912213404973348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,512,0.02094506621360779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,4096,0.07255146503448487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,512,0.023668267329533896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,256,0.011852799852689107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,256,0.019423999389012656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,3584,0.06426239808400472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,128,0.008431999882062276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,128,0.0166101336479187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,3072,0.05789546569188436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,128,0.01118293305238088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,2560,0.05198506514231364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,64,0.0056618665655454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,64,0.016722132762273155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,65536,32,0.005206400156021118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,65536,32,0.01683626572291056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,2048,0.04000106652577718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,16384,0.12588480313618977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,1024,0.024177066485087075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,65536,0.5346783955891927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,65536,0.28571627934773763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,768,0.020849066972732543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,16384,0.07900266647338867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,12288,0.10840960343678792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,12288,0.0704533338546753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,12288,0.05477226575215658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,512,0.01660053332646688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,10240,0.08090773423512777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,10240,0.05588266849517822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,8192,0.06512960195541381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,8192,0.04766506751378377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,65536,256,0.01353600025177002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,7168,0.05783679882685343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,7168,0.04341760079065959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,6144,0.050164266427357995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,6144,0.03981226682662964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,5120,0.0426144003868103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,5120,0.03555519978205363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,4096,0.03531519969304402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,4096,0.03309973279635112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,65536,0.2678048133850098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,16384,0.07107093334197997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,3584,0.034170667330423996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,3584,0.029971200227737426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,3072,0.027180800835291546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,10240,0.047976533571879074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,3072,0.02790293296178182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,2560,0.023427200317382813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,8192,0.03914773464202881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,2560,0.025354667504628496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,7168,0.03494720061620076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,2048,0.019345066944758096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,2048,0.022909865776697794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,6144,0.03101973334948222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,1536,0.015496533115704855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,1536,0.020887466271718343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,5120,0.027112533648808796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,1024,0.011191466450691223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,1024,0.01874986688296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,768,0.009152000149091084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,768,0.01795626680056254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,768,0.008999466896057129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,512,0.007247999807198842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,3584,0.02130026618639628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,512,0.015161599715550741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,512,0.007275733351707459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,3072,0.01916266679763794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,256,0.004215466479460398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,256,0.015146666765213012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,128,0.0036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,2560,0.01691733400026957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,128,0.015059199929237366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,2048,0.014475733041763306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,64,0.0033589333295822145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,64,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,16384,32,0.0034495999415715536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,1536,0.012190933028856914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,16384,32,0.015068800250689188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,1024,0.010003200173377991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,4096,0.022983467578887938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,16384,0.1012885332107544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,65536,0.41132373809814454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,65536,0.22086505889892577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,16384,0.06796159744262695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,12288,0.08913919925689698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,12288,0.05878719886144003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,10240,0.06860586802164713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,10240,0.04859946568806966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,8192,0.05529173215230306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,8192,0.04166826804478963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,256,0.006222933530807495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,7168,0.048262401421864824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,7168,0.03767253160476684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,7168,0.029095466931660968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,16384,128,0.005614933371543884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,6144,0.041994667053222655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,6144,0.034833065668741864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,5120,0.035605335235595705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,5120,0.03196053306261699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,5120,0.022197333971659343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,4096,0.028981333971023558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,4096,0.02809813419977824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,16384,0.06120959917704264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,65536,0.21338133811950682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,3584,0.025600000222524004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,3584,0.02643839915593465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,3584,0.018369066715240478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,3072,0.0226474662621816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,3072,0.02433919906616211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,2560,0.019452800353368126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,2560,0.022694400946299233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,10240,0.040491731961568196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,2048,0.015770666797955833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,2048,0.02109866738319397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,2048,0.012044800321261089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,1536,0.01274773379166921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,1536,0.019568000237147012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,8192,0.03245439926783244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,1536,0.010460799932479859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,1024,0.009468799829483033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,1024,0.01811520059903463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,6144,0.025255467494328814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,768,0.008017066617806752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,768,0.016809600591659545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,768,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,512,0.006275199850400289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,512,0.006469333171844482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,512,0.015320533514022827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,12288,0.04550293286641439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,256,0.0038922667503356934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,4096,0.019342933098475137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,256,0.014958932995796204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,128,0.014932266871134438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,256,0.005541333556175232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,128,0.003568000098069509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,3072,0.015941333770751954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,64,0.0031946666538715364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,64,0.014661332964897156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,12288,32,0.00323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,2560,0.013936000068982443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,12288,32,0.01470186710357666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,65536,0.19723092714945475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,65536,0.34702399571736653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,16384,0.09009280204772949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,16384,0.06210026741027832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,16384,0.055387731393178305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,12288,0.07347946961720785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,1024,0.008714666962623597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,12288,0.05041706562042236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,10240,0.05977813402811686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,10240,0.041946665445963545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,10240,0.03565013408660889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,8192,0.04924693504969279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,8192,0.036685868104298906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,7168,0.038916265964508055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,7168,0.03400213321050008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,6144,0.03377813498179118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,12288,128,0.005064533154169718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,6144,0.03107306758562724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,5120,0.029080533981323244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,5120,0.028642133871714277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,4096,0.024081067244211832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,4096,0.025682133436203004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,4096,0.01781546672185262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,3584,0.02145386735598246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,3584,0.024158932765324912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,65536,0.20240747133890785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,3584,0.01615466674168905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,3072,0.0201855997244517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,3072,0.022842667500178018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,2560,0.016293332974116007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,12288,0.04103893438975016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,2560,0.021923200289408366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,2048,0.013371733824412027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,8192,0.029124265909194945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,2048,0.0205567995707194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,7168,0.027322665850321455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,1536,0.010843732953071594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,1536,0.018396800756454466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,6144,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,1024,0.008145066599051159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,1024,0.017541333039601644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,768,0.007129600147406261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,768,0.015468800067901611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,512,0.0056991999348004665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,512,0.015293866395950317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,3072,0.014356266458829245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,256,0.0036831999818483984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,256,0.015011200308799743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,2560,0.012750933567682901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,256,0.0053951998551686605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,128,0.003389866650104523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,128,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,2048,0.011106133460998535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,64,0.003190399954716364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,64,0.014654933412869772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,5120,0.020504534244537354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,10240,32,0.0031040000418821974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,1536,0.00992746651172638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,10240,32,0.01474453310171763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,1024,0.008229333162307739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,65536,0.2619200070699056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,16384,0.06847786903381348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,65536,0.15263466835021972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,768,0.0067552000284194945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,16384,0.049167998631795246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,16384,0.05025920073191324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,12288,0.0529258648554484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,12288,0.04292159875233968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,512,0.006108800073464712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,10240,0.043747198581695554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,10240,0.03621439933776856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,8192,0.034977066516876223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,8192,0.03246399958928426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,7168,0.03113173246383667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,7168,0.030036266644795733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,6144,0.027133866151173906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,6144,0.02799786726633708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,6144,0.022022400299708048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,5120,0.023358933130900063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,10240,128,0.00484799991051356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,5120,0.025544534126917522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,4096,0.019873066743214925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,4096,0.023277866840362548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,4096,0.01618773341178894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,3584,0.01773866613705953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,65536,0.16875093777974445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,3584,0.022613332668940226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,3072,0.01676373283068339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,12288,0.037641600767771406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,3072,0.021303466955820718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,2560,0.013366400202115377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,2560,0.020116267601648967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,10240,0.03257173299789429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,2048,0.011036800344785054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,8192,0.027720532814661664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,2048,0.018978132804234823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,1536,0.009340799848238627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,7168,0.024435200293858848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,1536,0.018027732769648232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,1024,0.007267199953397115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,1024,0.016286933422088624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,5120,0.01883626580238342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,768,0.0061589335401852924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,768,0.015524267156918844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,512,0.0043381333351135256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,512,0.01542080044746399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,512,0.00603413333495458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,256,0.0035968000690142312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,256,0.014944000045458474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,3072,0.013737600048383078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,128,0.0033269333342711128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,2560,0.01204373339811961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,128,0.01456106702486674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,64,0.0030154667794704436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,2048,0.011099732915560405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,64,0.014673067132631936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,8192,32,0.0032586666444937387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,1536,0.009436800082524618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,8192,32,0.01476479967435201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,1024,0.0077567999561627705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,65536,0.2457141399383545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,65536,0.14336640040079754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,16384,0.06244586706161499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,16384,0.04803626537322998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,768,0.006693333387374878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,12288,0.04550506671269734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,3584,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,12288,0.037905065218607585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,10240,0.03917760054270426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,10240,0.034601600964864095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,8192,0.03210026621818542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,256,0.005592533449331919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,8192,0.030392533540725707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,8192,0.026053333282470705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,7168,0.02820693254470825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,7168,0.02887786626815796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,8192,128,0.005003733436266581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,6144,0.02476159930229187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,6144,0.026294400294621784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,6144,0.02076693375905355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,5120,0.021477333704630532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,5120,0.02500586708386739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,4096,0.01813546617825826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,4096,0.022499199708302817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,65536,0.16655999819437664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,3584,0.01727679967880249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,16384,0.04835626681645711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,3584,0.021713066101074218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,3072,0.014108799894650779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,12288,0.03623893260955811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,3072,0.02034986615180969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,3072,0.012919466694196066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,2560,0.012244266271591187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,2560,0.019825067122777304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,2048,0.010492799679438274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,2048,0.018703999121983846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,1536,0.008804266651471455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,1536,0.017494400342305504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,7168,0.02340373396873474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,1024,0.006763733426729838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,1024,0.0152319997549057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,5120,0.018041600783665977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,1024,0.007162666817506154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,768,0.005902933577696482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,768,0.015917866428693136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,4096,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,512,0.003913599997758865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,512,0.015019733707110086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,256,0.0034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,256,0.014723199605941772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,10240,0.0312991996606191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,3584,0.014883200327555338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,128,0.0032149332265059153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,128,0.014354133605957031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,2560,0.011730133493741354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,64,0.0029077333708604175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,64,0.014509866635004679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,2048,0.010082133611043294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,7168,32,0.002930133293072383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,7168,32,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,1536,0.008779733379681905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,65536,0.1940245310465495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,65536,0.11803092956542968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,16384,0.0556885321935018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,65536,0.15951573053995768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,16384,0.045269334316253663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,16384,0.046562135219573975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,12288,0.04404053290685018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,768,0.00633493314186732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,12288,0.035633067289988204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,10240,0.03769280115763347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,10240,0.032096000512441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,8192,0.028810666004816694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,512,0.005694933235645294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,8192,0.028914133707682293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,7168,0.025463465849558515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,7168,0.02696320017178853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,256,0.0051466668645540874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,6144,0.022705066204071044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,6144,0.025587199131647746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,5120,0.01960853338241577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,5120,0.023436800638834635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,4096,0.01663253307342529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,4096,0.021891200542449953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,12288,0.03466879924138387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,3584,0.014566399653752646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,10240,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,3584,0.02169813315073649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,3584,0.01366933286190033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,3072,0.013050666451454163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,8192,0.026081067323684693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,3072,0.020106667280197145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,2560,0.011293866237004598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,7168,128,0.004840533435344696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,2560,0.019654399156570433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,7168,0.022818134228388468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,2560,0.011356799801190694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,2048,0.010069333513577779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,6144,0.020002132654190062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,2048,0.018422400951385497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,1536,0.008229333162307739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,1536,0.01620586713155111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,5120,0.01713386575380961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,1024,0.006481066842873891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,1024,0.0157258669535319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,1024,0.006862933437029521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,4096,0.015449600418408713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,768,0.005412266651789347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,768,0.015097600221633912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,512,0.003818666686614355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,512,0.014845866958300272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,256,0.0033141332368055976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,256,0.01471466620763143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,256,0.004954666892687479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,128,0.0030762667457262674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,128,0.014523733655611673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,64,0.0029077333708604175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,64,0.014563199877738953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,3072,0.012637866536776224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,6144,32,0.0029557332396507262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,6144,32,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,2048,0.009498666723569233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,65536,0.1719146728515625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,65536,0.10650880336761474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,1536,0.008405333757400513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,16384,0.051413333415985106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,16384,0.03919680118560791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,12288,0.03641066551208496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,12288,0.03206613262494405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,768,0.006133333345254262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,10240,0.03158719937006633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,10240,0.02959679961204529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,512,0.005674666663010915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,8192,0.026310400168100996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,8192,0.026315732796986895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,8192,0.024276266495386757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,7168,0.021011199553807577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,7168,0.02448106606801351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,6144,128,0.004661333560943603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,6144,0.018696532646814982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,6144,0.023306665817896526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,5120,0.016402133305867515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,5120,0.021945599714914957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,4096,0.014094932874043783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,65536,0.16377065976460775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,4096,0.020784000555674233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,16384,0.04583253463109334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,3584,0.012547199924786886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,3584,0.019962666432062785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,12288,0.03291306694348653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,3072,0.011002666751543681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,3072,0.019441066185633342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,2560,0.009878399968147277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,10240,0.02885226607322693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,2560,0.018557866414388023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,2560,0.010920533537864685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,2048,0.008551466464996337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,2048,0.017768534024556477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,2048,0.00944106678167979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,1536,0.007275733351707459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,7168,0.0215445339679718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,1536,0.015920000274976094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,1024,0.005829333265622457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,1024,0.015548800428708395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,6144,0.018729599316914876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,768,0.004285866518815359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,768,0.015399466951688132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,512,0.0037205333511034647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,5120,0.016497066617012023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,512,0.014691199858983359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,256,0.0032970666885375976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,4096,0.014626133441925048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,256,0.014392532904942832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,128,0.003050666550795237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,3584,0.01320319970448812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,128,0.014451199769973755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,3072,0.012084266543388367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,64,0.0028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,64,0.014532267053922018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,5120,32,0.0029845332105954488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,5120,32,0.014446933070818582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,65536,0.1345962683359782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,65536,0.08582080205281575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,16384,0.03739200035730998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,1536,0.007904000083605449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,16384,0.03422293265660604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,1024,0.006772266825040181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,12288,0.029397332668304445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,12288,0.02951573332150777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,768,0.006042666733264923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,12288,0.03054186701774597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,10240,0.025751467545827227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,10240,0.027165865898132323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,512,0.005497600138187409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,8192,0.022541866699854533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,8192,0.02488320072491964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,7168,0.019631999731063842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,256,0.004885333279768625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,7168,0.023513599236806234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,7168,0.02027413249015808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,6144,0.01727786660194397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,6144,0.02224640051523844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,5120,128,0.004705066482226053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,5120,0.015397333105405173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,5120,0.020508799950281778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,4096,0.013066666324933371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,4096,0.019781333208084107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,65536,0.1435808022816976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,3584,0.012875733772913614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,16384,0.042632532119750974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,3584,0.019172267119089762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,3072,0.010824533303578694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,3072,0.01858560045560201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,10240,0.02648319999376933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,2560,0.008462933699289958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,8192,0.022856533527374268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,2560,0.018447999159495035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,2560,0.010227200388908387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,2048,0.00767146646976471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,2048,0.016040533781051636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,6144,0.018283732732137046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,1536,0.006647466619809468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,1536,0.015759999553362526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,5120,0.01594986617565155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,1024,0.004829866687456766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,1024,0.015286399920781454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,768,0.004038399954636892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,768,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,4096,0.013714133699735006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,512,0.003605333218971888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,512,0.014745600024859109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,256,0.003293866664171219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,3584,0.012612266341845193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,256,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,128,0.003011200080315272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,128,0.014299733440081277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,3072,0.011700266599655151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,64,0.0028234665592511495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,64,0.014341333508491516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,4096,32,0.002922666569550832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,2048,0.008876799543698629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,4096,32,0.014619732896486918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,1536,0.007356800138950348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,65536,0.12244053681691487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,65536,0.08031786282857259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,16384,0.03514026800791423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,16384,0.031699200471242264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,1024,0.006457599997520447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,12288,0.026873600482940675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,768,0.006010666489601135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,12288,0.027512532472610474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,10240,0.023115734259287514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,512,0.005438933273156484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,10240,0.025124265750249224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,8192,0.01990293264389038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,8192,0.023436800638834635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,8192,0.02063680092493693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,256,0.005041066805521647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,7168,0.017944532632827758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,7168,0.02244053284327189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,7168,0.018963199853897095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,6144,0.01593280037244161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,4096,128,0.004637866715590159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,6144,0.021410133441289267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,5120,0.014086400469144186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,5120,0.020137600104014077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,4096,0.01222933332125346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,4096,0.019045333067576088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,4096,0.012846933801968894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,3584,0.011411199967066448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,3584,0.019113600254058838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,3072,0.010310399532318115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,65536,0.12587626775105792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,3072,0.018668800592422485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,16384,0.03915626605351766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,2560,0.008288000027338665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,2560,0.017138133446375527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,12288,0.028104533751805622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,2048,0.00716480016708374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,10240,0.024201599756876628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,2048,0.01618346671263377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,1536,0.006377600133419037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,1536,0.015552000204722086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,1024,0.004661333560943603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,1024,0.015102932850519816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,6144,0.016661333044370015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,768,0.003884800026814143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,5120,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,768,0.015691733360290526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,512,0.003602133442958196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,512,0.014779733618100485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,3584,0.011881599823633831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,3072,0.011281067132949829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,256,0.0033610666791598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,256,0.01442026694615682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,128,0.0029781334102153777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,2560,0.00972160001595815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,2048,0.00846613347530365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,128,0.014500266313552857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,64,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3584,32,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,1536,0.007092266778151194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,64,0.014282666643460593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3584,32,0.014491732915242514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,65536,0.10077439943949382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,65536,0.07109759648640951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,1024,0.00631039987007777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,16384,0.029546666145324706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,16384,0.03062506715456645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,768,0.00572266678015391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,12288,0.02444373369216919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,12288,0.02688746651013692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,10240,0.021746132771174112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,10240,0.024727465709050496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,128,0.004665599763393402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,8192,0.018347734212875368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,8192,0.02288533250490824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,7168,0.016509866714477538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,7168,0.021970132986704506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,65536,0.14656853675842285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,16384,0.04159786701202393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,512,0.0054282665252685545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,6144,0.01541759967803955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,6144,0.020873600244522096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,5120,0.012999467055002847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,5120,0.01976213256518046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,12288,0.03023359974225362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,4096,0.011240532994270325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3584,256,0.0050570666790008545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,4096,0.019035732746124266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,3584,0.010714667042096455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,10240,0.025722666581471758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,3584,0.018557866414388023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,3072,0.009807999928792317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,3072,0.018193066120147705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,2560,0.007761066655317943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,2560,0.016495999693870545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,8192,0.02274880011876424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,2048,0.006995200117429097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,2048,0.016225066781044007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,7168,0.019896533091862997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,2048,0.008215466638406117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,6144,0.017234132687250773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,1536,0.0059232001503308615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,1536,0.016646400094032288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,5120,0.015050666530927024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,1024,0.004227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,1024,0.01569066643714905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,768,0.003933866570393244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,4096,0.01320746640364329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,768,0.014966400464375815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,512,0.0035445332527160645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,512,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,256,0.0031658666829268134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,3072,0.011745066444079081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,256,0.014385066429773965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,128,0.002948266764481862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,2560,0.009711999694506328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,128,0.014321066935857139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,64,0.0028266665836175283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,64,0.014260266224543253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,3072,32,0.00277866671482722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,3072,32,0.014436266819636025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,65536,0.08874773184458415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,1536,0.007339733342329661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,1024,0.0063274666666984555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,65536,0.06408960024515788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,3584,0.01230506698290507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,16384,0.027804799874623615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,16384,0.0275711993376414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,768,0.006041599810123444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,12288,0.02100693384806315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,512,0.005239466826121012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,12288,0.024202666680018105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,12288,0.027617067098617554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,10240,0.018157867590586345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,10240,0.02303253412246704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,256,0.004896000027656555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,8192,0.021707733472188316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,8192,0.01556373337904612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,7168,0.0139765332142512
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,3072,128,0.004605866471926371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,7168,0.020637865861256918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,6144,0.012552533547083536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,6144,0.02008426586786906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,6144,0.015846400459607442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,5120,0.011286399761835734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,5120,0.01923946738243103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,4096,0.010185600320498148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,4096,0.01876266598701477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,4096,0.012289067109425861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,3584,0.00946453313032786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,3584,0.01829013427098592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,65536,0.1323584000269572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,3072,0.008891733487447102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,3072,0.016302933295567833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,2560,0.0074890668193499255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,2560,0.016340266664822897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,10240,0.023363200823465984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,2048,0.0067775999506314594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,8192,0.019783467054367065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,2048,0.016217600305875143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,7168,0.018212266763051353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,1536,0.005264000097910563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,1536,0.015733333428700765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,5120,0.013923199971516928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,1024,0.004277333120505015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,1024,0.015296000242233276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,16384,0.036474665006001786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,768,0.004031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,3584,0.011341866850852967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,768,0.015058133006095886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,512,0.0036170666416486105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,512,0.014830933014551798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,2560,0.008861866593360902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,256,0.00325546662012736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,2048,0.008020266890525818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,256,0.014218667149543762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,128,0.002919466545184453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,3072,0.010607999563217164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,128,0.014189866185188294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,64,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,64,0.014261333147684732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2560,32,0.0027114666998386385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,1536,0.007485866546630859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2560,32,0.014233600099881491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,65536,0.0690282662709554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,65536,0.05627306699752808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,65536,0.13116587003072103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,1024,0.006000000238418579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,16384,0.02148373325665792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,16384,0.025111466646194458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,12288,0.01738986571629842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,768,0.005614933371543884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,12288,0.027922133604685467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,12288,0.022318933407465616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,10240,0.015175466736157736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,10240,0.021746132771174112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,8192,0.013352533181508383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,8192,0.020742400487263998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,7168,0.012337066729863485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,7168,0.019977599382400513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,256,0.004904533425966898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,6144,0.01153706709543864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,6144,0.019064533710479736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,6144,0.01609813372294108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,5120,0.010214400291442872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,128,0.004540800054868063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,5120,0.019113600254058838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,4096,0.009139200051625569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,4096,0.017358932892481485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,3584,0.008548266688982646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,3584,0.01665493349234263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,16384,0.036858665943145755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,3072,0.008027733365694682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2560,512,0.005287466446558634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,3072,0.01676373283068339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,10240,0.02434239983558655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,2560,0.007259733478228251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,2560,0.01662613352139791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,8192,0.020351999998092653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,2048,0.006122666597366333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,7168,0.018273067474365235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,2048,0.01586026648680369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,1536,0.004772266745567322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,1536,0.015643733739852905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,5120,0.01418346663316091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,1024,0.0041354666153589886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,4096,0.01220373312632243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,1024,0.015068800250689188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,768,0.003819733361403147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,768,0.015339733163515726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,512,0.003436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,3584,0.011084799965222675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,512,0.014648532867431641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,256,0.0031178665657838186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,256,0.014484266440073649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,2560,0.008880000313123066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,128,0.002855466554562251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,128,0.014105600118637086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,2048,0.008875733613967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,64,0.0027327999472618104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,1536,0.00710399995247523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,64,0.014266666769981385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,2048,32,0.002881066749493281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,1024,0.006164266665776571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,2048,32,0.014376533031463624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,65536,0.05613119999567667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,65536,0.0472981333732605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,768,0.005660800139109293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,16384,0.018202666441599527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,16384,0.02314773400624593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,12288,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,512,0.005420800050099691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,12288,0.021128533283869426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,3072,0.010317867000897724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,10240,0.012949333588282267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,10240,0.020574933290481566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,256,0.004974933465321859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,8192,0.011150933305422465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,8192,0.019292799631754558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,8192,0.01737920045852661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,7168,0.01049066682656606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,7168,0.01938986579577128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,6144,0.009426132837931315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,6144,0.01889280080795288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,2048,128,0.004637866715590159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,5120,0.009170132875442504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,5120,0.01763520042101542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,65536,0.10564906597137451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,16384,0.0326474666595459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,4096,0.008077866832415263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,4096,0.017174400885899863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,12288,0.022871466477711995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,3584,0.008090666433175405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,3584,0.01750826636950175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,3072,0.00746666689713796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,3072,0.016850133736928306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,3072,0.009483733773231506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,7168,0.01537493367989858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,2560,0.006502399841944377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,2560,0.016552533706029257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,2048,0.005589333176612854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,6144,0.013452800114949546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,2048,0.01620266636212667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,1536,0.004781866570313772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,5120,0.011317333579063416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,1536,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,1024,0.00417493333419164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,1024,0.015211733182271323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,768,0.003738666574160258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,10240,0.019798400004704793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,768,0.01490133305390676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,4096,0.010408533612887065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,512,0.003472000112136205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,3584,0.00960106650988261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,512,0.014784000317255654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,256,0.0029919999341169994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,256,0.01423893372217814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,128,0.002844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,128,0.014243200421333313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,2560,0.007986133297284443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,64,0.0027978666126728057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,2048,0.007035733262697856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,64,0.014123732844988504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1536,32,0.0027872001131375628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1536,32,0.014226133624712626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,65536,0.037300264835357665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,65536,0.03837013244628906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,1536,0.006664533416430156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,16384,0.013734400272369385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,16384,0.021013333400090536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,1024,0.005731200178464254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,12288,0.011690666278203327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,12288,0.019373865922292073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,10240,0.010666666428248088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,10240,0.01940586765607198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,512,0.005089066425959269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,10240,0.016597333550453185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,8192,0.009191466371218364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,256,0.00476800004641215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,8192,0.018630399306615194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,7168,0.008525866270065307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,7168,0.018101332585016887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,6144,0.00781333347161611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,128,0.0045738667249679565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,6144,0.01763733426729838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,5120,0.007821866869926452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,5120,0.0180896004041036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,5120,0.00981226662794749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,65536,0.088810666402181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,4096,0.006926933427651723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,16384,0.025494400660196943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1536,768,0.005490133166313171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,4096,0.017400532960891724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,3584,0.006915199756622315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,12288,0.018901334206263224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,3584,0.017105066776275636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,3072,0.0062389334042867025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,3072,0.016756266355514526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,2560,0.0060693333546320595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,8192,0.013476266463597616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,2560,0.01613866686820984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,2048,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,7168,0.012270933389663697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,2048,0.015801599621772765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,1536,0.004772266745567322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,1536,0.015362133582433065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,6144,0.010813867052396137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,1024,0.0040853333969910945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,1024,0.015140266219774882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,1024,0.005308799942334493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,768,0.0036960000793139136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,768,0.0147189329067866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,512,0.003319466610749563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,4096,0.008548266688982646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,512,0.014429866274197897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,3584,0.008117333551247915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,256,0.003033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,256,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,128,0.002841600030660629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,128,0.014249599973360696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,2560,0.007067733506361644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,2048,0.006467199822266896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,64,0.0027242665489514667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,64,0.014351999759674073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,1024,32,0.002648533384005229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,1536,0.006128000219662985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,1024,32,0.014216533303260804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,65536,0.03102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,65536,0.034695466359456376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,16384,0.010990933577219645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,16384,0.020406399170557657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,3072,0.007692799965540569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,512,0.004858666658401489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,12288,0.009674666325251262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,12288,0.018770132462183634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,256,0.004612266520659129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,10240,0.008990933497746784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,768,0.0050901333491007485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,10240,0.018782933553059898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,10240,0.015436800320943198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,8192,0.008250666658083598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,1024,128,0.004462933540344239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,8192,0.01804693341255188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,7168,0.007895466685295106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,7168,0.01833066741625468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,7168,0.011566932996114094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,6144,0.007169066866238911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,6144,0.017527467012405394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,6144,0.010658133029937743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,5120,0.0072202667593956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,5120,0.01806079943974813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,65536,0.07799466451009116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,4096,0.006279466549555461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,4096,0.017385600010553996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,16384,0.02262826760609945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,3584,0.006607999900976817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,3584,0.01722986698150635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,3072,0.0063391998410224915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,3072,0.016566399733225504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,2560,0.0060917332768440245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,2560,0.01639253298441569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,2560,0.006994133194287618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,2048,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,2048,0.01604159971078237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,8192,0.01288426617781321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,1536,0.004852266609668731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,1536,0.015352533260981242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,1024,0.004093866546948751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,1024,0.014837333559989929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,1024,0.005268266797065735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,5120,0.0095360000928243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,768,0.0036256000399589538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,768,0.014672000209490457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,512,0.0033482665816942847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,4096,0.00844053328037262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,12288,0.0178656001885732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,512,0.014626133441925048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,256,0.0030794667700926462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,256,0.014301866292953491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,3584,0.007948799928029378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,128,0.002942933390537898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,128,0.014219733079274497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,3072,0.007422933479150136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,64,0.002737066646416982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,64,0.014057599504788718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,768,32,0.0026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,768,32,0.01416000028451284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,65536,0.02253440022468567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,2048,0.006234666705131531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,65536,0.029003733396530153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,1536,0.00591893345117569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,16384,0.009287466605504353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,16384,0.018784000476201376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,12288,0.00813973347345988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,12288,0.018114133675893148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,10240,0.008020266890525818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,768,0.004996266464392344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,10240,0.018407466014226277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,8192,0.0070602665344874065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,512,0.004730666677157084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,8192,0.01804800033569336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,128,0.004465066889921824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,7168,0.006828799843788147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,7168,0.01832853356997172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,6144,0.006146133442719777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,6144,0.01750719944636027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,65536,0.07570666472117106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,5120,0.006534400085608165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,16384,0.022032000621159873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,5120,0.017806933323542277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,5120,0.009479467074076335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,4096,0.0062047998110453285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,12288,0.016899200280507405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,4096,0.01727893352508545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,4096,0.008296533425649007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,3584,0.006482133269309997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,3584,0.017035732666651406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,3072,0.006218666831652323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,10240,0.01495253344376882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,768,256,0.004682666560014089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,3072,0.016691199938456216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,2560,0.006075733403364817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,8192,0.012532266974449157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,2560,0.016705065965652466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,2048,0.0054282665252685545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,2048,0.01607039968172709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,7168,0.011653332908948263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,1536,0.0048885335524876915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,1536,0.01574613352616628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,1024,0.004002133260170618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,6144,0.010478933652242024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,1024,0.015155200163523355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,768,0.0036245333651701607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,768,0.015239466230074564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,3584,0.00796693315108617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,512,0.00342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,3072,0.007281066477298736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,512,0.014477866888046264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,512,0.004614399870236715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,256,0.0030400000512599947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,256,0.014290133118629455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,2560,0.006808533271153767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,128,0.0028138667345046996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,2048,0.0061589335401852924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,128,0.014052266875902811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,64,0.0026026666164398193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,64,0.0141184002161026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,512,32,0.002657066782315572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,1024,0.005083733300367991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,512,32,0.01420266628265381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,65536,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,65536,0.025165865818659466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,65536,0.07582933108011881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,16384,0.0069461335738499955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,768,0.005075199902057648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,16384,0.018338133891423546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,12288,0.006400000055631001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,12288,0.017810134092966716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,10240,0.006595199803511302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,12288,0.016787199179331462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,10240,0.018450133005777993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,8192,0.006489600241184235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,256,0.004488533238569895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,8192,0.017837866147359212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,7168,0.006313600142796834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,1536,0.005905066430568695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,6144,0.006004266440868378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,7168,0.01815999945004781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,512,128,0.004433066646258036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,5120,0.006505600114663441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,6144,0.01789439916610718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,5120,0.018278400103251137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,4096,0.006138666470845541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,4096,0.017915733655293784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,3584,0.006358399987220764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,16384,0.02137920061747233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,3584,0.016993065675099693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,3072,0.006031999985376993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,3072,0.016568533579508462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,10240,0.014682666460673014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,2560,0.006097066899140676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,8192,0.012614400188128153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,2560,0.016062933206558227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,7168,0.01167680025100708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,2048,0.005336533486843109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,2048,0.015800533692042033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,6144,0.010468266407648722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,1536,0.0047423998514811196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,1536,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,5120,0.009381332993507385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,1536,0.005781333148479462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,1024,0.004088533421357473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,1024,0.014879999558130899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,768,0.003718400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,768,0.014541866381963095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,3584,0.007918933530648549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,512,0.003433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,512,0.0144896000623703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,256,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,3072,0.00728000005086263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,256,0.014313600460688271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,128,0.00279573326309522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,128,0.014223999778429666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,2560,0.006819200019041698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,64,0.002700799951950709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,2048,0.0061141331990559895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,64,0.014145066340764364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,256,32,0.0026986666023731233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,4096,0.008411733309427898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,256,32,0.014298666516939798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,65536,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,1024,0.0051807999610900875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,65536,0.022749867041905722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,16384,0.0062282666563987735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,16384,0.018846933046976724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,768,0.00489279975493749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,16384,0.021475199858347574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,12288,0.006107733150323232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,12288,0.018057600657145182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,512,0.004704000055789947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,10240,0.006435200075308482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,10240,0.01829119920730591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,8192,0.006291200220584869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,256,0.004422399898370107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,8192,0.01760960022608439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,7168,0.006131199995676676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,7168,0.01771519978841146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,256,128,0.004368000229199727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,6144,0.005931733548641205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,6144,0.017829332749048868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,6144,0.010496000448862713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,5120,0.006322133541107178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,5120,0.017805866400400796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,4096,0.005881600081920624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,4096,0.017373865842819212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,65536,0.07447893619537353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,3584,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,3584,0.01688106656074524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,12288,0.016871466239293417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,3072,0.005997866888840993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,10240,0.014724266529083253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,3072,0.016528000434239708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,2560,0.006109866499900818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,2560,0.01595626672108968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,2560,0.006885333359241486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,2048,0.005365333457787832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,7168,0.011562666296958924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,2048,0.015732266505559287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,1536,0.004725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,1536,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,5120,0.009458133578300476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,1024,0.0040832000474135075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,1024,0.014907733599344889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,768,0.003688533355792364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,4096,0.008313600222269695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,768,0.014665599664052328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,768,0.005055999755859375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,512,0.003340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,512,0.014485333363215128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,3584,0.007999999821186066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,256,0.003070933371782303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,8192,0.012681600451469422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,256,0.014396799604098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,256,0.004531200230121613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,3072,0.0073290665944417315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,128,0.0028351999819278715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,128,0.014311466614405313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,64,0.0026464000344276427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,2048,0.006129066646099091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,64,0.014282666643460593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,128,32,0.0027082666754722597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,1536,0.0057429333527882894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,65536,0.023355732361475624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,128,32,0.014190933108329773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,65536,0.010190932949384054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,16384,0.006066133578618368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,16384,0.01823893388112386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,12288,0.006192000210285186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,1024,0.00517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,12288,0.018148267269134523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,10240,0.006295466423034668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,10240,0.018225065867106118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,8192,0.006089599927266439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,8192,0.017863466342290243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,7168,0.005974400043487549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,7168,0.017901867628097534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,512,0.004733866453170777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,6144,0.005783466498057047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,3584,0.006167466441790262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,6144,0.017493333419164023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,5120,0.006145066519578298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,5120,0.018127999703089395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,4096,0.005707733333110809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,4096,0.017845332622528076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,3584,0.016568533579508462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,32,128,128,0.00432533323764801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,3072,0.005791999896367391
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,3072,0.0167413334051768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,2560,0.00597866674264272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,2560,0.016223999857902526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,2048,0.005295999844868978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,2048,0.015987199544906617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,1536,0.004691199958324432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,1536,0.015372799833615622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,1024,0.003982933362325033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,1024,0.015082666277885437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,768,0.0036330667634805044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,768,0.014882133404413859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,512,0.0034048000971476236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,512,0.01444586714108785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,256,0.0029898665845394133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,256,0.014286933342615762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,128,0.00288426677385966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,128,0.014310399691263834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,64,0.0026538667579491934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,64,0.014167466759681701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,64,32,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,64,32,0.014251733819643656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,65536,0.008764800429344178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,16384,0.006478933493296306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,65536,0.022338134050369263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,16384,0.018372267484664917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,12288,0.005940266450246175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,12288,0.017742933829625447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,10240,0.006098133325576782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,10240,0.01802560091018677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,8192,0.005992533266544342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,8192,0.017285333077112833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,7168,0.005834666887919108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,7168,0.017831466595331826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,6144,0.005831466615200042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,4096,0.01770240068435669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,6144,0.017338667313257852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,5120,0.006061866879463196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,5120,0.01829866568247477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,4096,0.005636266867319743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,3584,0.00618453323841095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,3072,0.005739733576774597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,3584,0.016872533162434897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,2048,0.01625920037428538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,3072,0.016406400005022685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,2560,0.005926399926344554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,2560,0.01593066652615865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,2048,0.005332266787687937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,1536,0.0046858668327331545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,1024,0.003979733337958654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,1536,0.015542399883270264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,1024,0.014804266889890037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,768,0.003589333345492681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,768,0.01455893317858378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,512,0.0033183999359607695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,512,0.014590932925542196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,256,0.003045333425203959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,256,0.01420906682809194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,128,0.00278613343834877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,128,0.01416000028451284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,64,0.0025759999950726825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,64,0.014249599973360696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,32,32,32,0.0026421333352724713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,32,32,32,0.014310399691263834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,16384,0.5234784126281739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,16384,0.2775061289469401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,12288,0.21088213920593263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,12288,0.3929365475972494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,10240,0.3281920115152995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,10240,0.18344213167826334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,8192,0.14698665936787922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,8192,0.264516274134318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,7168,0.23133333524068198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,7168,0.13269120057423908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,6144,0.1989162604014079
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,6144,0.11407252947489421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,5120,0.16681067148844403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,5120,0.09720213413238525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,4096,0.13424960772196454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,4096,0.08115306695302328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,16384,0.2561546643575033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,12288,0.19236693382263184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,3584,0.1256661335627238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,3584,0.07251946926116944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,10240,0.1757973353068034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,8192,0.14345067342122395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,3072,0.10159893035888672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,3072,0.07139626344045004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,7168,0.11535360018412273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,2560,0.08513279755910239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,2560,0.05613013505935669
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,6144,0.10196159680684407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,2048,0.06878506342569987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,2048,0.049217065175374344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,1536,0.052634668350219724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,1536,0.04039786656697591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,1024,0.036697598298390706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,1024,0.03208106756210327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,4096,0.068832000096639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,768,0.029659734169642134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,768,0.028065067529678345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,5120,0.09134079615275065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,3072,0.053515732288360596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,512,0.022457599639892578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,3584,0.06142186721165975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,512,0.023478400707244874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,256,0.012505599856376648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,256,0.019413334131240845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,128,0.00738560010989507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,2048,0.03847893476486206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,2560,0.048964265982309976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,128,0.016300800442695617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,1024,0.022426666816075642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,64,0.004726399978001913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,1536,0.03031253417332967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,64,0.01618133286635081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,65536,32,0.004446933170159658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,768,0.018454400698343913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,65536,32,0.016341333587964378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,16384,0.12402026653289795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,65536,0.5319274584452311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,65536,0.27205120722452797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,16384,0.0772490660349528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,12288,0.10642346541086833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,12288,0.06808853149414062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,10240,0.07960960070292154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,10240,0.05442453225453695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,256,0.011260799566904704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,8192,0.06415040095647176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,128,0.009065600236256917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,8192,0.046792534987131755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,7168,0.05673280159632364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,7168,0.04257813294728597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,6144,0.049214935302734374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,6144,0.039027198155721025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,65536,0.25961386362711586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,16384,0.06902293364206949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,65536,512,0.014969600240389505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,5120,0.04676906665166219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,5120,0.035069866975148516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,12288,0.05315626859664917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,4096,0.034968535105387374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,4096,0.031170133749643964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,3584,0.030631466706593828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,3584,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,3072,0.026686932643254595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,3072,0.02717333237330119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,10240,0.04546133279800415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,2560,0.023118933041890465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,2560,0.025149865945180254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,8192,0.038507731755574544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,2048,0.0195413331190745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,2048,0.02362346649169922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,7168,0.03361920118331909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,1536,0.015217066804567973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,1536,0.02063466707865397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,6144,0.02951786716779073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,1024,0.011012267072995503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,1024,0.01879253387451172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,4096,0.021691733598709108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,768,0.008931199709574383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,768,0.017848533391952515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,3072,0.01807039976119995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,3584,0.01994133392969767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,512,0.006938666601975759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,512,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,256,0.004082133372624716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,256,0.014869333306948344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,2560,0.016005333264668783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,2048,0.0136245330174764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,128,0.0034933333595593774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,128,0.014788267016410828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,64,0.003190399954716364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,64,0.014828800161679586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,16384,32,0.0032821332414944967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,1536,0.011476266384124755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,16384,32,0.014867200454076131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,1024,0.00920853316783905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,65536,0.21944853464762368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,65536,0.39237867991129555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,5120,0.025885866085688276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,16384,0.10334080060323078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,16384,0.07142293453216553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,12288,0.07915200392405192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,12288,0.053447465101877846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,10240,0.065174400806427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,10240,0.047150933742523195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,768,0.008226133386294047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,512,0.005885866781075796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,8192,0.053707734743754065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,8192,0.04074133237202962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,256,0.005294933418432872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,7168,0.04761279821395874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,7168,0.037213865915934244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,16384,128,0.004851200183232625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,6144,0.041145598888397215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,6144,0.034430932998657224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,5120,0.03498880068461101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,5120,0.031129600604375203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,65536,0.20676159858703613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,4096,0.03101759950319926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,4096,0.02791573405265808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,12288,0.04405973354975383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,3584,0.025421865781148273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,3584,0.02629973292350769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,8192,0.030690133571624756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,3072,0.022115200757980347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,3072,0.024569600820541382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,7168,0.027422932783762614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,2560,0.019050665696461997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,2560,0.022746666272481283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,6144,0.024873600403467814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,2048,0.016090666254361473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,2048,0.02102933327356974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,5120,0.021639466285705566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,1536,0.012651733557383218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,1536,0.01948053240776062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,16384,0.056909867127736415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,1024,0.00941546658674876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,1024,0.017812265952428182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,10240,0.037291733423868816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,4096,0.018462934096654258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,768,0.00856213370958964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,768,0.01687893271446228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,3584,0.016692266861597697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,3072,0.015196800231933594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,512,0.006230400005976359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,512,0.01513706644376119
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,256,0.0037813333173592886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,256,0.01477120021979014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,2048,0.011521066228548687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,2560,0.013397333025932313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,128,0.003336533407370249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,128,0.01467626690864563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,64,0.0029685333371162414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,1536,0.01013973355293274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,64,0.014626133441925048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,12288,32,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,12288,32,0.014367999633153281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,65536,0.3386165301005045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,65536,0.18838613828023273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,16384,0.10160533587137859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,16384,0.06071786483128866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,12288,0.06739093462626139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,12288,0.04602133433024089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,512,0.005588266750176748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,256,0.005073066552480062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,10240,0.057239464918772374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,10240,0.04111466805140178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,128,0.00480320006608963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,8192,0.047185067335764566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,8192,0.035997867584228516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,1024,0.008392533659934998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,7168,0.038447999954223634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,12288,768,0.006772266825040181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,7168,0.03338666756947835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,6144,0.033514666557312014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,6144,0.03078293402989705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,5120,0.02881386677424113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,5120,0.028678399324417115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,65536,0.17704854011535645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,4096,0.025605332851409913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,16384,0.04787946542104085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,4096,0.025054933627446492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,3584,0.021268266439437866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,3584,0.023681066433588662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,12288,0.03737279971440633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,3072,0.0184608002503713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,3072,0.02239039937655131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,10240,0.0312991996606191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,2560,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,8192,0.026565333207448322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,2560,0.021010132630666097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,2048,0.013083733121554055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,7168,0.023864533503850302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,2048,0.01986133257548014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,1536,0.010420266787211101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,1536,0.018521600961685182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,6144,0.021600000063578286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,1024,0.008267733454704284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,1024,0.017233065764109292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,4096,0.0168778657913208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,768,0.0069354668259620665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,768,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,3584,0.014884266257286071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,3072,0.013347199559211731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,512,0.005810133119424184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,512,0.015032533804575601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,256,0.003432533393303553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,256,0.014699733257293702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,2048,0.010506666700045268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,5120,0.019351466496785482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,128,0.00322026660044988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,2560,0.012121599912643433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,128,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,1024,0.007897600034872691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,64,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,1536,0.009343999624252319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,64,0.014806399742762247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,10240,32,0.0030271999537944795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,10240,32,0.014641066392262777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,768,0.00591893345117569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,65536,0.2681045214335124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,65536,0.14735466639200848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,512,0.005422933399677277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,16384,0.06663146813710531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,16384,0.04736106793085734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,12288,0.05168746709823609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,256,0.005150933563709259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,12288,0.039046398798624676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,10240,0.04201066493988037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,10240,128,0.004657066861788432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,10240,0.03535786469777425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,8192,0.03423893451690674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,8192,0.031675734122594196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,7168,0.030612266063690184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,7168,0.029710932572682695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,6144,0.026804266373316447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,6144,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,5120,0.02305493354797363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,65536,0.14268693923950196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,5120,0.024919466177622477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,16384,0.04188266595204671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,4096,0.019364267587661743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,4096,0.0228928009668986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,12288,0.031154133876164752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,10240,0.027109332879384357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,3584,0.017218132813771568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,3584,0.022142932812372843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,8192,0.022760534286499025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,3072,0.0151829332113266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,3072,0.02125119964281718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,7168,0.02059733271598816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,2560,0.013218133648236593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,2560,0.019987199703852335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,2048,0.01097813347975413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,5120,0.017128533124923705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,2048,0.01876586675643921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,1536,0.009150933225949604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,1536,0.017808000246683754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,4096,0.013428266843159994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,1024,0.007017600039641063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,1024,0.016101333498954772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,768,0.006196266909440359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,3584,0.012821333607037863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,6144,0.018399999539057414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,3072,0.011574400464693706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,768,0.015620266397794088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,512,0.004072533299525579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,512,0.015097600221633912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,256,0.003570133447647095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,256,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,2560,0.010774399836858113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,128,0.0032042667269706728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,2048,0.009532800316810608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,128,0.014281599720319112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,64,0.0031690667072931922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,64,0.014498133460680643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,8192,32,0.003068800022204717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,8192,32,0.014525866508483887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,1536,0.008716799815495809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,65536,0.2399445374806722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,65536,0.13378987312316895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,1024,0.006559999783833821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,16384,0.07012693087259927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,16384,0.04627946615219116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,12288,0.04663360118865967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,12288,0.038523733615875244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,768,0.005668266614278158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,512,0.00522986650466919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,10240,0.04050026734670003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,10240,0.03461653391520182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,8192,0.033532798290252686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,8192,0.031023999055226643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,128,0.004802133142948151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,7168,0.029808000723520918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,7168,0.027818665901819868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,6144,0.026279467344284057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,6144,0.02581653396288554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,8192,256,0.005241600175698599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,65536,0.12685973644256593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,5120,0.021170133352279664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,16384,0.036421334743499754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,5120,0.024238934119542442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,4096,0.017838933070500693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,4096,0.022341332832972207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,3584,0.01592639982700348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,12288,0.02840533256530762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,3584,0.021717333793640138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,10240,0.024165334304173787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,3072,0.01402773360411326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,8192,0.020897066593170165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,3072,0.020377600193023683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,2560,0.012171733379364013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,7168,0.018911999464035035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,2560,0.01947306593259176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,6144,0.017051732540130614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,2048,0.010272000233332317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,2048,0.0185098667939504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,1536,0.008667733271916707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,1536,0.01746986707051595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,1024,0.006737066805362702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,4096,0.01279039978981018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,1024,0.01546239952246348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,768,0.005838933090368906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,3584,0.011959466338157653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,3072,0.010945066809654236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,768,0.014967466394106546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,512,0.003774933268626531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,512,0.014723199605941772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,2560,0.01025493343671163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,256,0.0033930666744709016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,5120,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,2048,0.008923733234405517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,256,0.014849066734313965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,128,0.003230933348337809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,128,0.014292266964912415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,1536,0.008231466511885326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,1024,0.006198399762312571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,768,0.0058218667904535925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,64,0.00297173336148262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,64,0.014242133498191834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,7168,32,0.002868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,512,0.005453866720199585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,7168,32,0.01439466675122579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,65536,0.19278292655944823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,256,0.005061333378156027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,65536,0.1137504021326701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,7168,128,0.004818133513132731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,16384,0.05271039803822836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,16384,0.0407968004544576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,12288,0.04246079921722412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,12288,0.033642665545145674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,10240,0.03707306782404582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,10240,0.03137386639912923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,65536,0.11318933169047038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,8192,0.028130133946736652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,16384,0.03348906834920247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,8192,0.028264532486597698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,6144,0.02210986614227295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,7168,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,12288,0.0257749338944753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,7168,0.026602667570114136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,10240,0.022657066583633423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,6144,0.024024534225463866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,8192,0.019332265853881835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,5120,0.019076265891393027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,5120,0.022997333606084188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,7168,0.01700053413709005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,4096,0.016311466693878174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,4096,0.021284266312917074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,6144,0.015559466679890952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,3584,0.014164266983668008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,3584,0.020822399854660036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,5120,0.013895466923713684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,3072,0.012638933459917703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,3072,0.019621332486470543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,4096,0.011860266327857971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,2560,0.011132799585660299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,2560,0.018982400496800743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,3584,0.011300266782442728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,2048,0.009816533327102661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,2048,0.0180842657883962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,3072,0.010216533144315084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,1536,0.008145066599051159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,1536,0.01649066706498464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,2560,0.009611733754475911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,2048,0.008648533622423809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,1024,0.006443733473618825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,1024,0.0150709331035614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,768,0.005357866485913595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,1536,0.007589333256085714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,768,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,1024,0.005937066674232483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,512,0.003815466662247976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,512,0.014877866705258688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,256,0.0032287999987602235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,768,0.005422933399677277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,256,0.014523733655611673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,128,0.003033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,128,0.0142794668674469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,64,0.0027562665442625684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,256,0.004940799872080485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,64,0.014371200402577718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,512,0.005397333204746247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,6144,32,0.0028949332733949023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,6144,32,0.014411733547846476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,65536,0.16794880231221515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,65536,0.10157546997070313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,16384,0.04879039923350016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,16384,0.03768853346506755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,12288,0.03778133392333984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,12288,0.03268479903539022
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,10240,0.03066986600557963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,10240,0.028622933228810626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,8192,0.025740800301233928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,8192,0.026106667518615723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,7168,0.020917334159215293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,7168,0.02411839962005615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,6144,0.01856106718381246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,6144,128,0.004661333560943603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,6144,0.02243306636810303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,5120,0.016192000110944113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,5120,0.021410133441289267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,65536,0.1004863977432251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,16384,0.02869759996732076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,4096,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,4096,0.02005866765975952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,12288,0.024261333545049033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,3584,0.011981866757074992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,3584,0.01956160068511963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,10240,0.02081706722577413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,3072,0.010826667149861652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,3072,0.01919999917348226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,2560,0.009718400239944459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,7168,0.016030933459599814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,2560,0.01843199928601583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,2048,0.00837546686331431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,2048,0.017890133460362754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,6144,0.014190933108329773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,1536,0.007353599866231282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,5120,0.012604799866676331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,1536,0.015921066204706825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,1024,0.005852800110975901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,8192,0.017744000752766928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,4096,0.011204266548156738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,1024,0.01600320041179657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,768,0.004424533247947693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,768,0.01520746648311615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,3072,0.009780266880989074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,512,0.0035968000690142312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,512,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,3584,0.010849066575368245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,256,0.0031466667850812277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,256,0.01444906691710154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,128,0.003033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,128,0.014424533645311991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,2048,0.008286933104197185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,2560,0.00925973355770111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,64,0.002752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,64,0.014292266964912415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,5120,32,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,5120,32,0.01439786652723948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,1536,0.0069365332523981735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,65536,0.13293013572692872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,65536,0.08158186276753744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,16384,0.03997546831766764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,16384,0.03336533308029175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,12288,0.028474666674931842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,12288,0.028431999683380126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,1024,0.00595306654771169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,10240,0.02625280022621155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,512,0.005306666592756907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,256,0.004991999765237173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,10240,0.026578134298324584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,128,0.004740266501903534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,8192,0.021663999557495116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,8192,0.02431679964065552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,7168,0.01973653237024943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,7168,0.023028266429901124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,6144,0.01711146632830302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,6144,0.022100265820821127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,16384,0.027080533901850383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,5120,0.014921599626541137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,5120,0.0204693337281545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,12288,0.021945599714914957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,65536,0.08925013542175293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,4096,0.012797866264979044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,4096,0.019330133994420372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,3584,0.011817600329717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,5120,768,0.005682133138179779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,3584,0.019106133778889974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,3072,0.010665599505106609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,3072,0.0184714674949646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,10240,0.01904319922129313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,2560,0.008481066425641377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,8192,0.016126933693885803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,2560,0.01764586567878723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,2048,0.007677866518497467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,7168,0.014745600024859109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,2048,0.01665386656920115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,1536,0.006577066580454509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,1536,0.015785599748293556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,6144,0.013622400164604188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,1024,0.0044608001907666525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,1024,0.01527253290017446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,5120,0.012116266290346782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,768,0.003868799904982249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,768,0.014818132917086283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,4096,0.01086186667283376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,512,0.0036917333801587426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,512,0.014750933647155762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,3072,0.009426132837931315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,256,0.003257599969704946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,256,0.014371200402577718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,3584,0.010482133428255717
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,128,0.002980266759792964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,128,0.01434346636136373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,2048,0.007447466750939687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,64,0.00275093341867129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,64,0.014334932963053385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,2560,0.008910933136940002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,4096,32,0.0028351999819278715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,1024,0.005717333157857259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,4096,32,0.014450132846832275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,1536,0.006723199784755707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,65536,0.12004799842834472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,65536,0.07623679637908935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,16384,0.03439146677652995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,16384,0.031454932689666745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,768,0.0054175997773806255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,12288,0.026210133234659833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,12288,0.026257065931955974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,10240,0.022686932484308878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,256,0.004874666531880697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,10240,0.024939733743667602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,8192,0.019000534216562906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,8192,0.02301973303159078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,7168,0.01718506614367167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,7168,0.02188053329785665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,128,0.004726399978001913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,6144,0.015384533007939658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,6144,0.021285333236058555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,4096,512,0.005236266553401947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,65536,0.08721813360850016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,5120,0.014158933361371358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,16384,0.026563199361165364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,5120,0.02031360069910685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,4096,0.012013866504033407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,4096,0.019035732746124266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,12288,0.021296000480651854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,3584,0.011079466342926026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,10240,0.018658133347829182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,3584,0.01869759956995646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,8192,0.015701333681742348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,3072,0.010086400310198466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,3072,0.018222934007644652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,2560,0.008147199948628742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,7168,0.014685866236686707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,2560,0.0168938676516215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,2048,0.007210666437943776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,2048,0.016035200158754984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,1536,0.0063498665889104204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,6144,0.013377066453297934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,1536,0.015558399756749473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,1024,0.004487466812133789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,1024,0.01514240006605784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,5120,0.01218239963054657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,768,0.0038624001046021783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,768,0.015161599715550741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,4096,0.01055680016676585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,512,0.003505066782236099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,512,0.014969600240389505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,3584,0.010290132959683736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,3072,0.00938986639181773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,256,0.0031466667850812277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,256,0.014450132846832275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,2048,0.007197866837183635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,128,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,128,0.014251733819643656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,2560,0.008566400408744812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,64,0.0027434666951497394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,64,0.014167466759681701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3584,32,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,1024,0.006006399790445963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,1536,0.0066549330949783325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3584,32,0.0143477330605189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,65536,0.10501653353373211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,16384,0.03025706609090169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,65536,0.06753173669179281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,16384,0.028946133454640706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,12288,0.02390186587969462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,12288,0.025618133942286174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,10240,0.020925867557525634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,10240,0.023561600844065347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,768,0.00559146652619044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,8192,0.01766080061594645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,8192,0.022541866699854533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,512,0.005269333223501841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,7168,0.015931733449300132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,7168,0.02225386699040731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,256,0.004996266464392344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3584,128,0.004827733337879181
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,6144,0.014294399817784628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,6144,0.020745599269866945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,5120,0.01274133324623108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,5120,0.019853866100311278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,65536,0.08692373434702555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,4096,0.01087679962317149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,16384,0.025870933135350542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,4096,0.019029333194096883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,12288,0.021010132630666097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,3584,0.010051199793815612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,3584,0.018170666694641114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,3072,0.00956053336461385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,10240,0.018245333433151247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,3072,0.01751040021578471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,2560,0.00782719999551773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,8192,0.015576533476511636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,2560,0.01616426706314087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,2048,0.006899199883143107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,2048,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,7168,0.014338133732477823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,1536,0.005936000247796377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,1536,0.01569386621316274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,6144,0.013090133666992188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,1024,0.00421973317861557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,5120,0.01188800036907196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,1024,0.0151936004559199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,768,0.0039327998956044516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,768,0.014987732966740927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,4096,0.01058240036169688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,512,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,512,0.014693333705266317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,3072,0.009016533692677815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,256,0.0030901332696278887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,3584,0.010233599940935771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,256,0.014398933450380961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,2048,0.007055999835332234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,128,0.002792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,128,0.014227199554443359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,2560,0.008523733417193095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,64,0.0027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,64,0.014401066303253173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,3072,32,0.0027669332921504974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,3072,32,0.01446293294429779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,65536,0.08684693177541097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,65536,0.06010773181915283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,1536,0.006588799754778545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,1024,0.005789866546789805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,16384,0.026074665784835815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,16384,0.026748800277709962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,768,0.005448533097902933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,12288,0.02095466653505961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,12288,0.023808000485102336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,10240,0.017953066031138103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,10240,0.02236479918162028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,8192,0.015158399939537048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,512,0.005148800214131674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,8192,0.02109973430633545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,256,0.005061333378156027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,7168,0.013693867127100625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,7168,0.02053546706835429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,3072,128,0.004737066725889841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,6144,0.01237546702226003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,6144,0.01925333340962728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,5120,0.011084799965222675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,5120,0.019338667392730713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,65536,0.08629653453826905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,4096,0.010003200173377991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,4096,0.018722132841746012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,16384,0.02572373350461324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,12288,0.0206058661142985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,3584,0.00918506681919098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,3584,0.017748266458511353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,3072,0.008737066388130188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,3072,0.016390400131543477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,8192,0.015473066767056783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,2560,0.0074442664782206226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,2560,0.016327466567357382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,7168,0.014342400431632995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,2048,0.006651733318964641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,6144,0.013037866353988648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,2048,0.015836800138155617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,1536,0.005020800232887268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,10240,0.017959467569986978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,1536,0.015609600146611533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,5120,0.011717333396275838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,1024,0.004175999760627746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,1024,0.015117866794268289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,768,0.0037471999724706015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,768,0.015049599607785544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,4096,0.010397866368293762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,512,0.0035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,512,0.014694399634997048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,3072,0.00848640004793803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,3584,0.009821866949399311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,256,0.0031626666585604347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,256,0.014325333635012307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,128,0.0028223998844623564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,128,0.014477866888046264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,2048,0.00695360004901886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,64,0.0027114666998386385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,2560,0.007869866490364075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,64,0.014290133118629455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,1536,0.006609066824118296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2560,32,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,1024,0.005880533158779145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2560,32,0.014422399799029031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,768,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,65536,0.0671295960744222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,65536,0.051336534818013514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,16384,0.021166932582855225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,16384,0.02440746625264486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,12288,0.016847999890645345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,12288,0.021720532576243082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,10240,0.015568000078201295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,10240,0.020910932620366415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,8192,0.012989866733551025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,512,0.005256533126036326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,8192,0.019719467560450236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,256,0.0049002667268117275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,7168,0.011769599715868632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,7168,0.019620267550150554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2560,128,0.004744533201058706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,6144,0.011148800452550251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,6144,0.01919893423716227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,5120,0.010250666737556457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,5120,0.01918720006942749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,65536,0.08510293165842692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,4096,0.008969600001970928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,4096,0.017516799767812095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,16384,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,12288,0.020283732811609903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,3584,0.008404266834259034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,3584,0.017416532834370932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,10240,0.017733333508173625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,3072,0.007896533111731212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,8192,0.015335466464360556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,3072,0.01641813317934672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,2560,0.007366399963696797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,2560,0.016247466206550598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,7168,0.01407360037167867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,2048,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,2048,0.0158869336048762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,6144,0.012854400277137756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,1536,0.004874666531880697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,1536,0.015607466300328573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,5120,0.011373866597811382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,1024,0.004123733441034953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,1024,0.014866133530934652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,768,0.0037717332442601522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,4096,0.00992746651172638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,768,0.01488746702671051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,512,0.0033930666744709016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,3072,0.008214400211970011
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,512,0.01458560029665629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,3584,0.009225599964459737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,256,0.0030805334448814393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,2048,0.006884266436100006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,2560,0.007893333335717519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,256,0.014249599973360696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,128,0.002882133424282074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,128,0.014144000411033631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,1024,0.0057205334305763245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,64,0.002674133330583572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,1536,0.0065194666385650635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,64,0.014102400342623393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,2048,32,0.0026890667776266735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,2048,32,0.014254933595657349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,65536,0.05453653335571289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,65536,0.04423146645228068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,16384,0.017299199104309083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,16384,0.022337067127227783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,12288,0.013801599542299906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,12288,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,10240,0.012587733070055642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,768,0.005362133185068766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,10240,0.019678932428359986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,8192,0.010959999759991963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,8192,0.01976213256518046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,512,0.005126399795214335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,7168,0.010200533270835876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,7168,0.01885226567586263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,256,0.004789333542188009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,6144,0.009381332993507385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,6144,0.018314667542775474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,2048,128,0.004642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,5120,0.008750933408737182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,5120,0.018441599607467652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,65536,0.08554560343424479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,4096,0.007996800045172375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,16384,0.025146667162577314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,4096,0.017385600010553996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,12288,0.020091732343037925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,3584,0.007961600025494894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,3584,0.01686186591784159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,3072,0.007400533556938172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,10240,0.01766293247540792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,3072,0.016330666343371072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,2560,0.006433066725730896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,8192,0.015203199783960977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,2560,0.016074666380882265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,7168,0.013886933525403341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,2048,0.005498666564623515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,6144,0.012583466370900473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,2048,0.01574613352616628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,1536,0.004936533172925314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,5120,0.011012267072995503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,1536,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,1024,0.004054400076468786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,1024,0.015185067057609558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,768,0.0036757332583268487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,768,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,4096,0.00942080020904541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,512,0.0034314667185147605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,512,0.0145578662554423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,3584,0.00904960036277771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,3072,0.008130133152008057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,256,0.003054933249950409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,256,0.014387200276056925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,128,0.0028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,128,0.0141567995150884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,2560,0.0076906666159629825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,64,0.0027200000981489818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,2048,0.006820266445477803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,64,0.01418880025545756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1536,32,0.0027189334233601887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1536,32,0.014114133516947427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,65536,0.036892799536387126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,1024,0.005760000149408976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,65536,0.03514240185419719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,16384,0.01344000001748403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,768,0.005342933535575867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,16384,0.02070186734199524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,1536,0.006562133133411407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,12288,0.011389866471290588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,12288,0.018770132462183634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,10240,0.010549333691596986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,10240,0.018772266308466592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,8192,0.009261866410573322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,512,0.005208533505598704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,8192,0.017682133118311565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,256,0.00491946687301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,7168,0.008502399921417237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1536,128,0.00477866679430008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,7168,0.017798399925231932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,6144,0.007770666480064392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,6144,0.017787732680638633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,65536,0.08480213483174642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,5120,0.007855999966462452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,5120,0.01794346570968628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,16384,0.02501973311106364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,12288,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,4096,0.00664106657107671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,4096,0.017400532960891724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,10240,0.01756160060564677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,3584,0.006856533388296763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,3584,0.01707520087560018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,8192,0.014591999848683677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,3072,0.006311466793219249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,3072,0.016424533724784852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,2560,0.006134399771690368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,7168,0.013026133179664612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,2560,0.0161461333433787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,2048,0.005418666700522105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,2048,0.015707733233769734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,1536,0.00476800004641215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,1536,0.015359999736150107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,6144,0.011938132842381795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,1024,0.004102399945259095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,1024,0.014774399995803832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,5120,0.010663466652234395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,768,0.003718400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,768,0.014748799800872802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,4096,0.009337600072224934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,512,0.003442133218050003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,512,0.014499200383822122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,3584,0.008917333682378133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,3072,0.00807360013326009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,256,0.0030901332696278887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,256,0.014401066303253173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,128,0.002810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,2048,0.006930133203665416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,128,0.014206932981808982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,64,0.002762666592995326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,2560,0.007629866898059845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,64,0.01405333379904429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,1024,32,0.002661333233118057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,1536,0.006487466891606649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,1024,32,0.014137599865595499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,65536,0.031221334139506025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,1024,0.005774933099746704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,65536,0.031540266672770184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,16384,0.01097706655661265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,16384,0.019411200284957887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,12288,0.009379200140635173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,12288,0.018053332964579262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,10240,0.008922666311264038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,10240,0.018905599912007652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,768,0.005426133175690969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,8192,0.008202666540940602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,8192,0.017517866690953572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,512,0.005165866514046987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,7168,0.007650133470694225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,7168,0.018091734250386557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,256,0.004905599852403005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,6144,0.007052800059318543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,1024,128,0.004704000055789947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,6144,0.017593600352605186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,5120,0.006962133447329204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,5120,0.018130133549372353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,65536,0.08537706534067789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,4096,0.006293333570162455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,16384,0.02512213389078776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,4096,0.017474132776260375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,12288,0.01997013290723165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,3584,0.006674133241176605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,3584,0.016835200786590575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,3072,0.006232533355553945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,10240,0.017092265685399375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,3072,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,2560,0.006122666597366333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,2560,0.016140799721082053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,8192,0.01470080018043518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,2048,0.005412266651789347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,2048,0.015812266866366068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,7168,0.013116799791653953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,1536,0.004704000055789947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,6144,0.01172693371772766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,1536,0.01525973379611969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,5120,0.010611200332641601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,1024,0.004038399954636892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,1024,0.014990933736165366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,768,0.003748266647259394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,4096,0.00929813285668691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,768,0.01507306694984436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,3072,0.00811413327852885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,512,0.0033503999312718712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,3584,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,2560,0.0076447998483975725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,512,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,2048,0.006874666611353557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,256,0.0030762667457262674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,256,0.014417066176732381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,128,0.0028778667251269023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,1536,0.006550399959087372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,128,0.014147200187047324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,1024,0.005731200178464254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,768,0.005452799797058106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,512,0.005142400165398916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,64,0.0028160000840822858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,256,0.0049098665515581764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,64,0.014290133118629455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,768,32,0.002648533384005229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,768,32,0.014147200187047324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,65536,0.021924267212549843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,65536,0.026209066311518352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,16384,0.009157333771387737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,16384,0.018371200561523436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,12288,0.00819413314263026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,12288,0.017887999614079796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,768,128,0.004695466657479604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,10240,0.007752533257007599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,10240,0.018220800161361694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,8192,0.007029333213965099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,8192,0.017862399419148765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,65536,0.08450773557027182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,7168,0.006674133241176605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,16384,0.02443839907646179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,7168,0.017915733655293784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,12288,0.01954879959424337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,6144,0.006246399879455566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,10240,0.016825600465138753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,6144,0.01767146587371826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,8192,0.014219733079274497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,5120,0.006459733347098033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,5120,0.017805866400400796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,7168,0.013092266519864401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,4096,0.006133333345254262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,4096,0.017463467518488564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,6144,0.011808000008265178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,3584,0.006460799773534138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,3584,0.016860800981521606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,3072,0.0061482667922973635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,5120,0.01053546667098999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,3072,0.016382933656374613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,2560,0.006156800190607706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,2560,0.016028799613316855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,2048,0.005452799797058106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,4096,0.009325866897900898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,2048,0.016074666380882265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,1536,0.004730666677157084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,1536,0.015398400028546652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,3072,0.008001066744327545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,2560,0.007732266684373219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,3584,0.008897067109743754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,1024,0.004165333261092504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,1024,0.015004799763361613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,2048,0.006829866766929626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,768,0.003730133424202601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,768,0.014788267016410828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,512,0.0034783999125162757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,1536,0.0064416001240412395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,512,0.014706133802731832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,256,0.003092266619205475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,1024,0.005694933235645294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,256,0.014315733313560485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,768,0.005377066632111868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,128,0.0028160000840822858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,512,0.005197866757710775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,128,0.014172800381978354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,64,0.002674133330583572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,64,0.01420266628265381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,512,32,0.002739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,512,32,0.014239999651908874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,128,0.004556799928347269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,65536,0.014177067081133523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,65536,0.023324799537658692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,16384,0.006925866504510244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,512,256,0.005008000135421753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,16384,0.018235733111699425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,12288,0.006234666705131531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,12288,0.01753066579500834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,10240,0.006611200173695882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,10240,0.0178656001885732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,8192,0.00625493327776591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,65536,0.083897598584493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,8192,0.017313067118326822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,7168,0.0062496001521746315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,7168,0.018305067221323648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,16384,0.024344533681869507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,6144,0.0059914668401082356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,12288,0.0191594660282135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,6144,0.017834667364756265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,10240,0.016635732849438985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,8192,0.014179199934005737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,5120,0.006365866462389629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,7168,0.012941867113113403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,5120,0.01849173307418823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,6144,0.011730133493741354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,5120,0.010574932893117268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,4096,0.006000000238418579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,4096,0.017358932892481485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,3584,0.00643093337615331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,3584,0.01702400048573812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,3072,0.006038400034109751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,3072,0.016340266664822897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,4096,0.009267200032869976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,2560,0.006037333110968272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,2560,0.0167413334051768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,2048,0.005381333331267038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,2048,0.0157642662525177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,3584,0.008826667070388794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,1536,0.00470719983180364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,1536,0.015187199910481772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,1024,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,1024,0.015009066462516785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,2560,0.007565866907437642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,768,0.003667200108369192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,2048,0.00692799985408783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,3072,0.008147199948628742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,768,0.015092266599337259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,1536,0.006311466793219249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,1024,0.005624533196290334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,512,0.0032981333633263906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,512,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,256,0.002995199958483378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,768,0.0053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,256,0.014383999506632486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,128,0.0027722666660944624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,128,0.014038399855295817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,512,0.005002666513125102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,64,0.0027295999228954316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,64,0.01409280002117157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,256,32,0.0027232001225153605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,256,32,0.014186666409174601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,256,0.004770133395989736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,65536,0.01066986620426178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,65536,0.020062933365503945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,256,128,0.004714666803677877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,16384,0.006163200239340464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,16384,0.018304000298182167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,12288,0.005981866518656413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,12288,0.01735360026359558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,10240,0.006190933287143707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,16384,0.023947733640670776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,10240,0.017465599377950034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,65536,0.08452053070068359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,8192,0.006051200131575266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,8192,0.01730239987373352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,12288,0.019228800137837728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,7168,0.005902933577696482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,7168,0.01768959959348043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,6144,0.00581333339214325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,10240,0.016637866695721946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,6144,0.017208532492319743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,5120,0.0061141331990559895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,8192,0.014145066340764364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,5120,0.01796906590461731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,4096,0.005769599974155426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,4096,0.01680533289909363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,3584,0.006183466811974844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,7168,0.012891733646392822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,6144,0.011706667145093282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,3584,0.01660480002562205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,3072,0.0057888001203536986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,3072,0.016042666633923848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,5120,0.01053013304869334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,4096,0.0092031995455424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,2560,0.005836800237496694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,3584,0.008844799796740214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,2560,0.01609599987665812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,3072,0.008021333316961924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,2048,0.005190399785836538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,2048,0.015706666310628257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,1536,0.004664533336957296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,1536,0.01539520025253296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,1024,0.003972266614437103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,2560,0.0074890668193499255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,1024,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,768,0.0037461332976818085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,2048,0.006788266698519389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,1536,0.006321066617965698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,768,0.014842666188875833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,512,0.003269333392381668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,512,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,1024,0.0055402666330337524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,256,0.0029557332396507262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,256,0.014407466848691305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,128,0.0028031999866167706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,768,0.0053845331072807315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,128,0.013920000195503235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,64,0.0026506667335828146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,512,0.0049792001644770306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,64,0.013921067118644714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,128,32,0.002631466587384542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,256,0.004822400212287903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,128,32,0.014137599865595499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,16,128,128,0.004670933385690053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,65536,0.008449066678682964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,65536,0.019530665874481202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,16384,0.005992533266544342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,16384,0.017861332496007284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,12288,0.005841066439946493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,12288,0.01737173398335775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,10240,0.00600853314002355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,10240,0.017357865969340004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,8192,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,8192,0.017256534099578856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,7168,0.005794133245944977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,7168,0.01758613387743632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,6144,0.005682133138179779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,6144,0.017449599504470826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,5120,0.006016000111897787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,5120,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,4096,0.005688533186912537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,4096,0.01690773367881775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,3584,0.006078933179378509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,3584,0.016433067123095193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,3072,0.005785599847634633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,3072,0.016126933693885803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,2560,0.005723733206590017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,2560,0.015961600343386333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,2048,0.005106133222579956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,2048,0.01562346617380778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,1536,0.004561066627502441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,1536,0.015014400084813436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,1024,0.0039007999002933502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,1024,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,768,0.0035487999518712364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,768,0.014731733004252115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,512,0.003293866664171219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,512,0.0145578662554423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,256,0.0029909332593282064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,256,0.014447999993960061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,128,0.0027413333455721537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,128,0.014081066846847535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,64,0.002536533276240031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,64,0.014226133624712626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,64,32,0.002661333233118057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,64,32,0.014169599612553915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,65536,0.008522666494051616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,65536,0.019913599888483683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,16384,0.006045866509278615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,8192,0.017284266153971353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,16384,0.017826133966445924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,12288,0.006093866626421611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,12288,0.017181867361068727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,10240,0.006016000111897787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,10240,0.017480534315109254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,8192,0.005942399799823761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,7168,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,7168,0.017922133207321167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,6144,0.005751466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,6144,0.017683200041453042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,5120,0.006044800082842508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,5120,0.018012799819310508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,4096,0.005646933118502299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,4096,0.01707520087560018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,3584,0.006165333092212677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,3584,0.016641066471735636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,3072,0.005727999905745188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,3072,0.016312533617019655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,2560,0.00573333352804184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,2560,0.015998933712641397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,2048,0.005157333115736643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,2048,0.015845333536465965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,1536,0.004637866715590159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,1536,0.0153546671072642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,1024,0.0038111999630928038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,1024,0.0151829332113266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,768,0.0035946667194366455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,768,0.014847999811172486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,512,0.003234133372704188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,512,0.014829867084821067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,256,0.0029450667401154833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,256,0.014493866761525472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,128,0.0026805333793163298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,128,0.013964800039927163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,64,0.0025781333446502686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,64,0.014506666858990987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,16,32,32,0.0026357332865397137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,16,32,32,0.014640000462532044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,16384,0.5220277468363445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,16384,0.2745557467142741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,12288,0.39119253158569334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,12288,0.2095413366953532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,10240,0.32751681009928385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,16384,0.2630197366078695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,10240,0.17821866671244305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,12288,0.19332906405131023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,8192,0.26201705932617186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,8192,0.161407995223999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,10240,0.17566827138264973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,7168,0.23070613543192545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,8192,0.13069653511047363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,7168,0.13179840246836344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,6144,0.19720533688863118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,7168,0.11503679752349853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,6144,0.11303040186564128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,5120,0.1660437266031901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,6144,0.09999893506368002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,5120,0.10793279806772868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,4096,0.13682559331258137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,4096,0.08333120346069336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,5120,0.0888970692952474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,3584,0.11672746340433757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,3584,0.07975573539733886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,3072,0.104584534962972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,4096,0.06856213410695394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,3584,0.06187413136164347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,3072,0.06407680114110312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,2560,0.08937919934590657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,2560,0.05603946844736735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,3072,0.053174400329589845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,2048,0.07805546919504801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,2048,0.0486954649289449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,2560,0.045482667287190755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,1536,0.05214399894078573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,1536,0.04164053201675415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,2048,0.03930346568425496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,1024,0.03652906815210978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,1024,0.032092799743016556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,1536,0.02993599971135457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,768,0.02803093393643697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,768,0.0276256004969279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,512,0.019761067628860474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,1024,0.02207146684328715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,512,0.02276373306910197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,256,0.011107200384140014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,768,0.01834133267402649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,256,0.02000853419303894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,512,0.014563199877738953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,128,0.007256533205509186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,128,0.01570026675860087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,256,0.011020800471305848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,64,0.004374399781227112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,64,0.015590399503707886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,65536,32,0.004409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,65536,128,0.008546132842699687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,65536,32,0.016134400169054666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,65536,0.5251765251159668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,65536,0.2696799914042155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,65536,0.25990613301595056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,16384,0.12468586762746174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,16384,0.07667946815490723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,12288,0.09451306660970052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,16384,0.06880853176116944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,12288,0.06313600142796835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,12288,0.05731946627298991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,10240,0.09059413274129233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,10240,0.05899519920349121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,8192,0.06433706680933635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,8192,0.04641706546147664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,10240,0.04688640038172404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,7168,0.05872000058492025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,7168,0.04209386507670085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,7168,0.03319786588350932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,8192,0.037767465909322104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,6144,0.04942506551742554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,6144,0.038753068447113036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,6144,0.02949013312657674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,5120,0.041842134793599446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,5120,0.03580693403879802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,4096,0.03444480101267497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,4096,0.031150933106740313
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,5120,0.0256661335627238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,4096,0.02148266633351644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,3584,0.030228267113367718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,3584,0.0289792001247406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,3072,0.027438932657241823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,3072,0.027009065945943194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,3584,0.01978879968325297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,2560,0.02288960019747416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,2560,0.024598399798075356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,2560,0.015777066349983215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,2048,0.018961066007614137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,2048,0.02264639933904012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,3072,0.017820799350738527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,1536,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,1536,0.020483199755350748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,1024,0.010915199915568035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,2048,0.013355732957522074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,1024,0.018707199891408285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,768,0.008956799904505413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,768,0.017669334014256795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,768,0.008178133269151051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,1536,0.011415466666221619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,512,0.006994133194287618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,512,0.01535360018412272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,1024,0.00944213370482127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,256,0.003985066711902618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,256,0.014891733725865683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,512,0.0059690664211908976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,128,0.0034517332911491393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,128,0.014653866489728292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,64,0.0031136001149813337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,64,0.014748799800872802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,256,0.005264000097910563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,16384,32,0.0031221332649389905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,16384,32,0.014680533607800802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,65536,0.39296000798543296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,65536,0.2097269376118978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,16384,0.09884160359700521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,16384,0.0637279987335205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,16384,128,0.004905599852403005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,12288,0.0757472038269043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,12288,0.05739519993464152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,10240,0.07010560035705567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,10240,0.04721813201904297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,8192,0.0535584012667338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,65536,0.20024107297261556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,8192,0.03986026843388875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,16384,0.05490346749623617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,12288,0.043084800243377686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,7168,0.04760746558507283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,7168,0.03706560134887695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,10240,0.036849065621693926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,6144,0.041157333056132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,6144,0.03392959833145141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,5120,0.034893866380055746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,8192,0.030217599868774415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,5120,0.03075946569442749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,4096,0.028100266059239702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,4096,0.027980800469716387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,7168,0.02884693344434102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,6144,0.02445440093676249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,5120,0.021432532866795858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,3584,0.025272534290949507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,3584,0.026172800858815508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,3072,0.0221834659576416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,3072,0.02455893357594808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,2560,0.01907093326250712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,2560,0.022528000672658286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,2048,0.016065067052841185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,2048,0.02113173405329386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,4096,0.018499199549357095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,1536,0.012793599565823873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,1536,0.01925333340962728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,3584,0.01690773367881775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,1024,0.009256533781687419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,3072,0.014916266997655234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,1024,0.017633066574732462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,768,0.007815466821193695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,2560,0.013297067085901896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,768,0.016455466548601784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,2048,0.011749333143234253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,512,0.006168533364931742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,512,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,256,0.003790933390458425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,256,0.01476800044377645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,128,0.0032085334261258446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,1536,0.010132267077763876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,128,0.014359466234842935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,64,0.0029887999097506206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,1024,0.008186666667461396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,64,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,12288,32,0.0029898665845394133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,12288,32,0.014513066411018372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,65536,0.3355423927307129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,768,0.007117866476376851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,16384,0.09034773508707682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,65536,0.19011732737223308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,65536,0.17492586771647137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,16384,0.06232106685638428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,12288,0.06843732992808024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,512,0.00551146666208903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,12288,0.04978453318277995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,256,0.005097599824269613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,12288,128,0.004818133513132731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,10240,0.058200534184773764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,16384,0.04810880025227864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,10240,0.040676267941792806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,12288,0.03909013271331787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,8192,0.046586668491363524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,8192,0.03547413349151611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,7168,0.03941226800282796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,7168,0.03311786651611328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,6144,0.03362773259480794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,6144,0.02988160053888957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,10240,0.03224959969520569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,6144,0.02151573300361633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,5120,0.028867199023564653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,5120,0.028058665990829467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,4096,0.022555732727050783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,8192,0.026804266373316447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,4096,0.02480106751124064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,3584,0.02093013326327006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,7168,0.024062933524449666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,3584,0.02370133399963379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,3072,0.018573866287867228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,3072,0.022284799814224245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,5120,0.019040000438690186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,2560,0.015797332922617594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,2560,0.021167999505996703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,4096,0.016306133071581522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,3584,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,2048,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,2048,0.019781333208084107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,1536,0.010710400342941285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,1536,0.01854506731033325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,3072,0.013398399949073792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,1536,0.009276800354321798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,1024,0.008158933122952778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,1024,0.01700693368911743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,768,0.00689386675755183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,768,0.014971733093261719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,2560,0.01225386659304301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,512,0.005654400090376536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,512,0.014990933736165366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,2048,0.010346666971842448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,256,0.003509333233038584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,256,0.014502400159835815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,1024,0.007777066528797149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,128,0.0031136001149813337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,128,0.014590932925542196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,64,0.0029834667841593427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,64,0.014510933558146158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,768,0.005880533158779145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,10240,32,0.0030752000709374744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,10240,32,0.014421332875887552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,512,0.005388799806435903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,65536,0.26411946614583337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,65536,0.14408853848775227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,256,0.005077333251635233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,16384,0.06662506659825643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,16384,0.04655253489812215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,10240,128,0.004726399978001913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,12288,0.05857173204421997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,12288,0.040948267777760824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,10240,0.041674665609995526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,10240,0.03540693521499634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,8192,0.035267198085784913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,8192,0.03174933393796285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,7168,0.030364799499511718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,7168,0.029285333553949994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,6144,0.026332799593607587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,6144,0.027195733785629273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,6144,0.018184532721837364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,65536,0.1424789269765218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,5120,0.02313386599222819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,5120,0.025088000297546386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,4096,0.019421867529551187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,4096,0.02287786602973938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,4096,0.0134442667166392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,3584,0.016987733046213784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,3584,0.022100265820821127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,16384,0.04042666753133138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,3072,0.015280000368754067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,3072,0.021092265844345093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,12288,0.030859732627868654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,2560,0.013004799683888754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,2560,0.019800533850987755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,10240,0.026756266752878826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,2048,0.010987733801205952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,2048,0.018572799364725747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,1536,0.009082667032877604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,1536,0.017658666769663493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,7168,0.020278400182723998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,1024,0.0070816000302632645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,5120,0.017001599073410034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,1024,0.014945066968599954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,768,0.006167466441790262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,768,0.014859732985496522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,3584,0.012683733304341634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,768,0.005750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,512,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,3072,0.011293866237004598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,512,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,8192,0.023108265797297158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,2560,0.01055680016676585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,2048,0.009317333499590557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,1536,0.008589866757392883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,256,0.003435733417669932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,256,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,1024,0.006526933113733928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,128,0.0031413334111372627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,128,0.014450132846832275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,64,0.0029834667841593427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,64,0.014392532904942832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,8192,32,0.0028938665986061097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,8192,32,0.014333867033322654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,65536,0.22913066546122232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,512,0.005593599875768026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,65536,0.135642671585083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,16384,0.06191786527633667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,16384,0.04484479824701945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,12288,0.04631893237431844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,12288,0.03812906742095947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,128,0.004694400231043497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,10240,0.039774934450785324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,10240,0.03411733309427897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,10240,0.023834667603174844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,8192,0.03313493331273397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,8192,0.031117866436640423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,7168,0.029582933584849043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,65536,0.12608426411946613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,7168,0.028408533334732054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,8192,256,0.005121066669623057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,6144,0.02601813276608785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,6144,0.027031467358271284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,5120,0.022719999154408775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,16384,0.03760000069936116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,5120,0.02364906668663025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,12288,0.028114134073257448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,4096,0.021897600094477335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,8192,0.020802134275436403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,4096,0.020035199324289956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,7168,0.018747733036677042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,3584,0.01744746764500936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,3584,0.021172267198562623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,6144,0.01678933302561442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,3072,0.015723733107248943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,3072,0.020137600104014077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,2560,0.012065066893895467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,2560,0.019362133741378785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,5120,0.014883200327555338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,2048,0.010447999835014344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,2048,0.018278400103251137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,1536,0.008643200000127155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,1536,0.017257599035898845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,4096,0.01264959971110026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,1024,0.006660266717274983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,1024,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,768,0.0059125334024429325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,768,0.014793599645296732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,512,0.0036447999378045404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,512,0.014762666821479798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,3072,0.010914132992426554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,256,0.0032320000231266023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,256,0.014542933305104574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,2560,0.010188800096511842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,128,0.003018666555484136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,2048,0.008725333213806152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,128,0.014281599720319112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,64,0.0028064000109831494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,3584,0.012101333340009053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,1536,0.008032000064849854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,64,0.014239999651908874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,7168,32,0.00296426663796107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,65536,0.19829227129618326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,7168,32,0.014205867052078247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,1024,0.006299733122189839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,768,0.00589333325624466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,512,0.005383466680844625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,65536,0.11184639930725097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,16384,0.05223679939905802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,256,0.0051146666208903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,16384,0.039920000235239665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,12288,0.04331306616465251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,12288,0.034203732013702394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,7168,128,0.004771199822425842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,10240,0.03509120146433513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,10240,0.0306986669699351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,8192,0.02811093330383301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,65536,0.10692266623179118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,16384,0.03234773278236389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,8192,0.027745066086451213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,12288,0.02568320035934448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,7168,0.02478933334350586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,7168,0.02616426746050517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,7168,0.018071466684341432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,6144,0.021895466248194377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,6144,0.024412800868352257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,5120,0.01890346606572469
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,10240,0.022312533855438233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,5120,0.02327573299407959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,4096,0.016225066781044007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,4096,0.021260799964269002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,3584,0.014016000429789224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,8192,0.019220266739527384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,3584,0.02063039938608805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,3072,0.012571733196576437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,3072,0.01989013353983561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,6144,0.015406933426856995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,2560,0.01120960017045339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,2560,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,2560,0.009542399644851684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,2048,0.009562666217486065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,2048,0.01806933283805847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,1536,0.008105599880218506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,1536,0.016722132762273155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,5120,0.013505066434542337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,1024,0.0064074665307998655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,4096,0.011844266454378765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,1024,0.015128533045450846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,3584,0.011230933666229247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,768,0.00521066685517629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,3072,0.010205866893132527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,768,0.014916266997655234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,512,0.0036138666172822317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,512,0.014570666352907815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,512,0.005268266797065735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,256,0.003222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,256,0.014353066682815552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,1024,0.005785599847634633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,128,0.002942933390537898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,128,0.014354133605957031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,64,0.002883200099070867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,64,0.014252799749374389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,6144,32,0.0028586665789286296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,768,0.005602133274078369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,2048,0.008384000261624653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,6144,32,0.014094932874043783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,65536,0.1651594638824463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,65536,0.09997546672821045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,1536,0.007812266548474629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,16384,0.046957866350809736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,16384,0.038427734375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,12288,0.03720213174819946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,12288,0.031169066826502483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,10240,0.03168320059776306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,256,0.005011199911435445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,6144,128,0.004670933385690053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,10240,0.027863466739654542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,65536,0.10004266897837322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,8192,0.026257065931955974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,8192,0.025571199258168538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,16384,0.02895359992980957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,7168,0.02473599910736084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,12288,0.023269333442052207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,7168,0.024066134293874105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,6144,0.018484266599019368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,6144,0.022380799055099487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,6144,0.014039466778437296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,5120,0.01594986617565155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,5120,0.02118720014890035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,4096,0.013719466328620911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,4096,0.01991999944051107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,3584,0.012063999970753986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,3584,0.019297067324320474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,8192,0.01698346734046936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,3072,0.010867200295130412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,7168,0.01580586632092794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,3072,0.01909866730372111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,2560,0.009654399752616883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,10240,0.02050986687342326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,5120,0.01258666714032491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,2560,0.01883626580238342
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,2048,0.00836906631787618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,2048,0.017435733477274576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,4096,0.011168000102043153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,1536,0.007216000060240428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,1536,0.015284267067909241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,3584,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,3072,0.009737599889437358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,1024,0.005690666536490122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,1024,0.015148799618085226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,768,0.004193066557248434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,768,0.014989866813023885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,768,0.00558186670144399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,512,0.0035807999471823373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,2560,0.009110400080680847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,512,0.014613333344459533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,256,0.0032405334214369455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,256,0.014486400286356607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,128,0.0029418667157491045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,2048,0.00819413314263026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,128,0.014200533429781595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,64,0.0027989332874615988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,64,0.014392532904942832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,5120,32,0.0027903998891512555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,5120,32,0.014122666915257773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,65536,0.13154666423797606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,65536,0.07945813337961832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,16384,0.039691734313964847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,16384,0.03222080071767171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,12288,0.02897706627845764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,1024,0.0058794667323430385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,12288,0.027974400917689008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,10240,0.025124265750249224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,10240,0.025935999552408856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,512,0.005236266553401947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,8192,0.02151573300361633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,256,0.0048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,8192,0.023822933435440063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,128,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,7168,0.01962666710217794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,7168,0.02262826760609945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,6144,0.01746986707051595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,5120,1536,0.00697920024394989
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,6144,0.021636267503102623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,6144,0.013438933094342551
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,65536,0.09003840287526449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,5120,0.014519466956456503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,5120,0.020312533775965372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,16384,0.026871466636657716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,12288,0.02140799959500631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,4096,0.012591999769210816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,10240,0.01908479928970337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,8192,0.016311466693878174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,4096,0.019035732746124266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,4096,0.010714667042096455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,7168,0.014421332875887552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,3072,0.018011732896169027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,3584,0.011550933122634888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,5120,0.012709333499272665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,3584,0.01858453353246053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,3584,0.010346666971842448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,3072,0.010424533486366272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,2560,0.00848746697107951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,2560,0.01771626671155294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,2048,0.007536000013351441
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,2048,0.015913599729537965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,1536,0.006609066824118296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,1536,0.015599999825159708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,3072,0.009434666236241658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,2560,0.008840533097585042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,1024,0.004443733394145966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,1024,0.015169066190719605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,2048,0.007422933479150136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,512,0.003610666592915853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,768,0.0038133333126703895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,768,0.014699733257293702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,512,0.014983466267585755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,256,0.003234133372704188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,256,0.014204800128936768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,256,0.00484799991051356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,128,0.0029333333174387617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,128,0.014217600226402283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,64,0.0027189334233601887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,64,0.014412800470987955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,1536,0.006607999900976817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,4096,32,0.002749866743882497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,1024,0.005679999788602194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,4096,32,0.014126933614412942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,65536,0.11970240275065105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,65536,0.07332906723022461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,16384,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,16384,0.03018239935239156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,12288,0.025950932502746583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,12288,0.026345600684483845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,768,0.005362133185068766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,10240,0.022231467564900718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,10240,0.02449173331260681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,8192,0.018974934021631876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,8192,0.022362667322158813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,7168,0.01730666756629944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,128,0.004600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,7168,0.021895466248194377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,6144,0.01530026694138845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,6144,0.02177066604296366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,65536,0.08957119782765707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,5120,0.013458133737246195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,16384,0.027111466725667315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,5120,0.0199455996354421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,4096,512,0.005130666494369507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,4096,0.012013866504033407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,12288,0.020858667294184365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,4096,0.018849066893259683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,3584,0.010919466614723206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,10240,0.018604799111684164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,3584,0.018521600961685182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,3072,0.009981866677602131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,7168,0.014519466956456503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,3072,0.0184063990910848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,2560,0.008105599880218506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,2560,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,2048,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,2048,0.01565439999103546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,5120,0.012097066640853882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,1536,0.006405333181222279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,4096,0.01071573297182719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,8192,0.016089600324630738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,1536,0.015461333592732749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,1024,0.004307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,3584,0.010151466727256775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,1024,0.015064533551534018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,768,0.0037845333417256674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,768,0.014969600240389505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,512,0.0035573333501815797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,512,0.014756266276041666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,6144,0.01323946714401245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,256,0.0032255999743938447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,3072,0.009301333626111349
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,256,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,2560,0.008622933427492778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,128,0.0029706666866938275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,128,0.014264532923698425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,64,0.0027445333699385325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,64,0.014111999670664468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3584,32,0.002703999976317088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3584,32,0.014215466380119324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,1536,0.006593066453933716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,65536,0.10407360394795735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,65536,0.065338667233785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,768,0.0055167997876803074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,16384,0.030767999092737836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,16384,0.028841600815455122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,12288,0.023460266987482707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,512,0.005297066768010458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,12288,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,10240,0.020536533991495767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,10240,0.023363200823465984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,256,0.004991999765237173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,2048,0.007413333157698314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,8192,0.017249067624409996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,8192,0.022165334224700926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,7168,0.0157642662525177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,1024,0.005938133100668589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,7168,0.021227733294169108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,6144,0.014111999670664468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,6144,0.020473599433898926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,65536,0.08540586630503336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,16384,0.025769599278767902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,6144,0.013117866714795432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,5120,0.012406399846076966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,12288,0.020658133427302043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,5120,0.01945706605911255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,4096,0.01071573297182719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,4096,0.01862506667772929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3584,128,0.004896000027656555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,3584,0.010067199667294819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,10240,0.017938133080800375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,3584,0.018222934007644652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,3072,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,3072,0.017978666226069133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,8192,0.015537066260973611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,2560,0.007740800082683563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,2560,0.016058666507403056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,2048,0.006844800213972728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,2048,0.015804800391197204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,5120,0.01181013286113739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,1536,0.005930666625499725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,4096,0.010427733262379963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,1536,0.015195733308792115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,1024,0.004129066566626231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,1024,0.015094400445620219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,7168,0.014341333508491516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,768,0.003940266619126002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,3072,0.00867306689421336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,768,0.014818132917086283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,3584,0.01004266639550527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,512,0.003537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,2048,0.006861866513888042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,512,0.01458453337351481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,256,0.003159466634194056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,256,0.014524799585342408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,1536,0.006625066697597504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,128,0.002868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,128,0.01423893372217814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,1024,0.005782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,64,0.0026357332865397137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,64,0.014291200041770934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,3072,32,0.0028586665789286296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,2560,0.008090666433175405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,3072,32,0.0143477330605189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,65536,0.08567679723103841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,65536,0.05799466768900553
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,16384,0.026397866010665894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,16384,0.02611626585324605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,512,0.005191466708978017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,12288,0.020590933163960774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,12288,0.02328853408495585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,128,0.004626133541266123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,10240,0.017771732807159425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,768,0.005527466535568237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,10240,0.02218666672706604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,8192,0.015097600221633912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,16384,0.02576533357302348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,3072,256,0.005076266825199127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,12288,0.02037546634674072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,8192,0.02093013326327006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,7168,0.013752532998720804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,7168,0.020146133502324422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,7168,0.014244266351064048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,6144,0.012202666203180949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,6144,0.020125865936279297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,10240,0.017912532885869345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,65536,0.08560746510823568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,5120,0.01095146636168162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,5120,0.019170133272806804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,4096,0.009845333298047383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,4096,0.018145066499710084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,3584,0.009292800227801006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,3584,0.017292799552281697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,3072,0.008598400155703227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,8192,0.015479466319084168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,3072,0.01653333306312561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,2560,0.007463466624418895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,2560,0.01601920028527578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,2048,0.0066453332702318835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,6144,0.012849066654841104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,2048,0.015812266866366068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,2048,0.006965333223342895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,1536,0.005043200155099233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,1536,0.015372799833615622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,1024,0.004145066688458125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,5120,0.01167039970556895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,1024,0.015187199910481772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,768,0.0037077332536379496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,768,0.014839466412862143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,512,0.0035146666069825493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,512,0.014740266402562461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,2560,0.00769706666469574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,256,0.003180799881617228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,256,0.014330666263898215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,1536,0.006482133269309997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,4096,0.010190932949384054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,128,0.002974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,128,0.014194132884343466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,1024,0.005730133255322774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,64,0.0027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,64,0.01428053379058838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,768,0.0054282665252685545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,3584,0.009778133034706116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2560,32,0.0026464000344276427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2560,32,0.01409386694431305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,3072,0.008678399523099263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,512,0.005087999999523163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,65536,0.0670250654220581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,65536,0.0494815985361735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,16384,0.02204373280207316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,16384,0.023661865790685018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,12288,0.01744640072186788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,12288,0.02151573300361633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,12288,0.020125865936279297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,256,0.004839466512203216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,10240,0.021050665775934854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,10240,0.015244799852371215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,8192,0.01304639975229899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2560,128,0.004763733347256978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,8192,0.02058346668879191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,7168,0.011993599931399028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,7168,0.013944533467292786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,7168,0.019381332397460937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,6144,0.01111466685930888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,6144,0.018861865997314452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,5120,0.009975467125574749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,5120,0.018662399053573607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,4096,0.008873599767684936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,65536,0.08444480101267496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,4096,0.017460266749064125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,3584,0.008289066453774769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,3584,0.016451199849446617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,3584,0.009142399827639262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,3072,0.007926400005817413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,3072,0.016243199507395424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,2560,0.007152000069618225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,10240,0.01760960022608439
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,2560,0.015863466262817382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,2048,0.005870933334032694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,2048,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,8192,0.015176533659299215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,1536,0.004716800153255462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,6144,0.012658133109410604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,1536,0.015069866180419922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,1024,0.004016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,5120,0.0114656001329422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,1024,0.014908799529075622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,16384,0.025364265839258833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,768,0.003673599908749262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,4096,0.009732266267140705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,768,0.014802133043607077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,512,0.003373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,512,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,256,0.0030975999931494398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,2560,0.007701333363850911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,256,0.014582399527231851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,2048,0.0067893331249554946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,128,0.0029535998900731405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,1536,0.006428800026575724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,128,0.01427839994430542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,64,0.0027903998891512555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,64,0.014043733477592468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,2048,32,0.0027317332724730173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,2048,32,0.014133333166440328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,65536,0.053653331597646084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,1024,0.0055744002262751256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,65536,0.04226133426030477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,768,0.005377066632111868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,16384,0.017119999726613364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,16384,0.021951999266942343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,3072,0.00826453318198522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,12288,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,512,0.0051136001944541935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,12288,0.020616533358891805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,10240,0.012167466680208842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,10240,0.019418666760126747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,8192,0.011113599936167399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,256,0.004770133395989736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,8192,0.019079466660817467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,7168,0.010061867038408915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,7168,0.018467199802398682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,7168,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,6144,0.009211732943852743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,6144,0.017905066410700478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,5120,0.008553600311279297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,5120,0.017897599935531618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,65536,0.08442453543345133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,16384,0.024947200218836466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,4096,0.007855999966462452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,4096,0.01730666756629944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,3584,0.007762133578459422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,3584,0.01641813317934672
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,10240,0.01738133430480957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,3072,0.007149866720040639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,8192,0.014964266618092855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,2048,128,0.0046304002404212955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,3072,0.016376533110936484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,2560,0.006555733581384023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,2560,0.016124799847602844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,2560,0.007747200131416321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,2048,0.005345066885153452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,6144,0.01243839959303538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,2048,0.01577600042025248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,1536,0.004700799783070883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,5120,0.010771200060844421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,1536,0.015440000096956888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,1024,0.004069333275159201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,12288,0.019972266753514607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,4096,0.009444266557693481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,1024,0.014940800269444785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,768,0.0037600000699361167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,768,0.01477013329664866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,512,0.0033717334270477297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,3584,0.008961066603660583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,512,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,256,0.002977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,256,0.014493866761525472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,3072,0.008089600006739299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,128,0.002855466554562251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,128,0.01418239971001943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,2048,0.006954666475454967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,64,0.002762666592995326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,1536,0.00633493314186732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1536,32,0.002632533262173335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,64,0.01418560047944387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1536,32,0.014195199807484946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,65536,0.035905067125956217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,768,0.005436799923578898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,65536,0.033428267637888594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,512,0.004985600213209788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,16384,0.013304533561070761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,16384,0.019636267423629762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,256,0.00481386681397756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,12288,0.011368532975514729
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,12288,0.0186954657236735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,128,0.004644266764322917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,10240,0.01039466659228007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,10240,0.01835306684176127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1536,1024,0.005776000022888183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,8192,0.009311999877293904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,8192,0.017348267634709678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,7168,0.008551466464996337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,7168,0.017433599630991618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,65536,0.084662397702535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,6144,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,6144,0.01716053287188212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,5120,0.007606400052706401
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,16384,0.024961066246032716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,5120,0.017753599087397258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,4096,0.006431999802589417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,12288,0.019850667317708334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,4096,0.017065600554148356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,10240,0.01738986571629842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,3584,0.006706133484840393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,8192,0.014443733294804893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,3584,0.01676586667696635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,7168,0.012948266665140786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,3072,0.006082133452097575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,6144,0.011678933103879293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,3072,0.016341333587964378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,3072,0.008011733492215473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,2560,0.006035199761390686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,2560,0.01637440025806427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,4096,0.009288533528645834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,2048,0.005366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,2048,0.01554026703039805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,1536,0.004710400104522705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,1024,0.004056533426046371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,3584,0.008878933389981587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,1536,0.015331199765205384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,5120,0.010596266388893128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,1536,0.0064746667941411335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,1024,0.015050666530927024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,768,0.0036373332142829893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,768,0.014830933014551798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,2560,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,512,0.0033269333342711128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,512,0.014556800325711569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,256,0.002998399982849757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,256,0.014376533031463624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,2048,0.006853333115577698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,128,0.014241066575050355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,128,0.00276799996693929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,64,0.0026837334036827086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,64,0.014226133624712626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,1024,32,0.0026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,1024,0.005721599857012431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,1024,32,0.01416106621424357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,65536,0.030551467339197797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,512,0.005049600203831991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,65536,0.029820799827575684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,65536,0.08451840082804361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,16384,0.010922666390736897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,16384,0.01914560000101725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,256,0.004682666560014089
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,12288,0.009083732962608337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,12288,0.017633066574732462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,128,0.004666666686534882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,10240,0.008861866593360902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,10240,0.01769919991493225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,8192,0.008169599870840708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,8192,0.01737706661224365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,1024,768,0.005448533097902933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,7168,0.007613866527875264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,7168,0.01768959959348043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,6144,0.006955733398596446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,6144,0.017428267002105712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,5120,0.006854400038719177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,5120,0.020057600736618043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,5120,0.01050453285376231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,16384,0.02477333347002665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,4096,0.006198399762312571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,4096,0.018810667594273887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,3584,0.006605866551399231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,12288,0.019678932428359986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,3584,0.017761067549387614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,10240,0.01707520087560018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,3072,0.006131199995676676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,8192,0.014386133352915446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,3072,0.017372800906499227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,3072,0.008051200211048127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,6144,0.011661866307258606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,2560,0.008492799599965413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,2560,0.01644053359826406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,2048,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,2048,0.01602240006128947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,1536,0.006111999849478403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,1536,0.015586133797963461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,1024,0.004638933142026265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,4096,0.009246933460235595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,1024,0.015084800124168397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,768,0.004228266576925913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,3584,0.008782933155695598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,768,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,7168,0.012940800189971924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,512,0.014691199858983359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,2560,0.00754559983809789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,512,0.003740799923737844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,2048,0.006769066552321117
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,256,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,256,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,1536,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,128,0.0029002666473388673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,1024,0.00559146652619044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,128,0.014257066448529563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,32,0.0026101333399613695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,32,0.014075733224550881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,128,0.004750933249791463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,65536,0.024794665972391765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,768,64,0.002681600054105123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,768,0.005328000088532766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,768,64,0.01416106621424357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,65536,0.021334399779637657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,12288,0.007853866616884867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,12288,0.017038933436075845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,16384,0.00902826686700185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,256,0.004817066589991251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,16384,0.018314667542775474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,768,512,0.005166933437188466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,8192,0.006921599805355072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,10240,0.007720533510049183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,10240,0.01797013282775879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,8192,0.017494400342305504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,7168,0.00650133341550827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,7168,0.017511467138926186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,6144,0.005970133344332377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,6144,0.01731626590092977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,16384,0.024207999308904014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,5120,0.006372266511122386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,12288,0.01951573292414347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,5120,0.017781333128611247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,65536,0.08396053314208984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,4096,0.01711146632830302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,10240,0.016378666957219443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,8192,0.01402239998181661
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,4096,0.005967999994754791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,3584,0.0063498665889104204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,6144,0.011659733454386393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,3584,0.01684373418490092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,3072,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,3072,0.016407466928164163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,5120,0.010404266913731893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,2560,0.00614933321873347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,2048,0.005446400245030721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,2560,0.0160970667997996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,2048,0.01572053333123525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,7168,0.012980266412099203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,1536,0.004747733473777771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,1536,0.015219199657440185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,1024,0.004148266712824504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,1024,0.01493333379427592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,768,0.0036992001036802924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,3584,0.008840533097585042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,768,0.014646400014559427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,512,0.0035690667728583017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,3072,0.008050133287906647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,512,0.014571733276049294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,2560,0.007563733557860057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,512,0.004961066444714864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,256,0.003124266614516576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,4096,0.0092330664396286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,2048,0.00680213322242101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,256,0.014418133099873862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,128,0.0028949332733949023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,1024,0.005686399837334951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,128,0.014383999506632486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,768,0.005334400137265523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,64,0.0026880001028378804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,64,0.014198399583498635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,512,32,0.0026890667776266735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,512,32,0.014291200041770934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,65536,0.013727999726931252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,65536,0.02146880030632019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,1536,0.006405333181222279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,16384,0.006622933348019918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,16384,0.017846399545669557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,256,0.004705066482226053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,12288,0.0063274666666984555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,12288,0.01769599914550781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,512,128,0.004549333453178405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,10240,0.0064085334539413456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,10240,0.017449599504470826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,65536,0.08428266843159994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,16384,0.023973333835601806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,8192,0.006187733511130015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,8192,0.017309866348902383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,12288,0.01885226567586263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,7168,0.006049066781997681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,7168,0.017595734198888144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,6144,0.005896533528963724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,6144,0.017197867234547935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,6144,0.011555199821790058
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,5120,0.006221866607666016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,8192,0.014030933380126953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,5120,0.01800533334414164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,4096,0.005782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,7168,0.012854400277137756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,4096,0.016899200280507405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,3584,0.006218666831652323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,10240,0.016523733735084534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,3584,0.01680319905281067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,3072,0.0059797331690788266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,3072,0.01612053314844767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,2560,0.006151466568311056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,2560,0.016155733664830526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,5120,0.010497066378593444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,2048,0.005380266904830932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,2048,0.015522133310635886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,4096,0.009237333138783773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,1536,0.004695466657479604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,1536,0.015198933084805808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,1024,0.003924266745646795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,3584,0.008680533369382222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,1024,0.014862933754920959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,768,0.0037717332442601522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,3072,0.007965866724650066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,768,0.014731733004252115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,512,0.0034111998975276947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,2560,0.0075647999842961625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,512,0.014571733276049294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,256,0.0029525332152843474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,256,0.014341333508491516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,128,0.0027285332481066385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,128,0.01421119968096415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,1024,0.0055978665749231975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,64,0.0026581334571043652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,64,0.014010666807492574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,256,32,0.002628266563018163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,256,32,0.014124799768129984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,65536,0.010675199826558431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,768,0.00528106689453125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,65536,0.01895893414815267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,2048,0.006842666864395141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,65536,0.08389440377553305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,16384,0.00586346685886383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,16384,0.01747200091679891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,16384,0.023870933055877685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,512,0.004884266853332519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,12288,0.005894400179386139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,12288,0.017065600554148356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,10240,0.006177066763242086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,1536,0.006470400094985962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,256,0.004760533571243286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,10240,0.01730453372001648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,8192,0.005925333499908448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,256,128,0.004488533238569895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,8192,0.01683626572291056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,8192,0.013991467157999673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,7168,0.005771733323733012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,7168,0.017540266116460167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,12288,0.018781866629918417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,6144,0.005683200061321258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,10240,0.01644373337427775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,6144,0.017218132813771568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,5120,0.005995733539263407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,5120,0.01783039967219035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,7168,0.012761599818865457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,4096,0.00565226674079895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,4096,0.0169706662495931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,4096,0.009206400314966837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,3584,0.006095999975999197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,3584,0.016711467504501344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,3072,0.005731200178464254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,3072,0.016134400169054666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,5120,0.010388267040252686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,2560,0.006000000238418579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,2560,0.01609173317750295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,2048,0.005308799942334493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,3584,0.008756267031033833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,2048,0.01551040013631185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,6144,0.011713066697120666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,3072,0.007880533238252004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,1536,0.0046079998215039575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,1536,0.015132799744606018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,1024,0.00410453329483668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,2560,0.007503999769687653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,1024,0.01495039959748586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,768,0.0035466666022936503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,2048,0.006678399940331777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,768,0.014728533228238425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,512,0.003340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,512,0.01448853313922882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,1024,0.005771733323733012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,256,0.002961066613594691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,768,0.005269333223501841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,1536,0.006431999802589417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,256,0.014186666409174601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,128,0.002890666574239731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,128,0.014132266243298849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,64,0.002526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,64,0.014248533050219217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,128,32,0.0026943999032179515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,128,32,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,512,0.004935466746489207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,65536,0.00798933357000351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,256,0.004770133395989736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,65536,0.018346667289733887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,8,128,128,0.004474666714668274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,16384,0.0058773333827654515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,16384,0.01741546591122945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,12288,0.005804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,12288,0.01697493394215902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,10240,0.005915733178456625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,10240,0.017565866311391197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,8192,0.005736533304055532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,8192,0.016777600844701132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,7168,0.005784533421198527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,7168,0.017545600732167564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,6144,0.005683200061321258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,6144,0.017076265811920167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,5120,0.0059562668204307554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,5120,0.017299199104309083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,4096,0.005469866593678792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,4096,0.016889599959055583
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,3584,0.006029866635799408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,3584,0.016364799936612447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,3072,0.005767466624577841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,3072,0.015940266847610473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,2560,0.0055637334783871974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,2560,0.016055466731389363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,2048,0.005022933085759481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,2048,0.01532586713631948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,1536,0.004492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,1536,0.015058133006095886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,1024,0.003920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,1024,0.015687466661135355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,768,0.0035232000052928926
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,768,0.014640000462532044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,512,0.0032277333239714304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,512,0.014325333635012307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,256,0.00297173336148262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,256,0.014267733693122864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,128,0.0027413333455721537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,128,0.014146133263905843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,64,0.00258240004380544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,64,0.014154666662216186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,64,32,0.002625066787004471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,64,32,0.014043733477592468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,65536,0.008203733464082081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,65536,0.019078399737675986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,16384,0.005858133236567179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,16384,0.01762666702270508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,12288,0.005725866556167603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,12288,0.017197867234547935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,10240,0.005782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,10240,0.01723626653353373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,8192,0.0058677335580190025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,8192,0.016940800348917644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,7168,0.005764266848564148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,7168,0.017406932512919106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,6144,0.005547733108202616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,6144,0.01716266671816508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,5120,0.005838933090368906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,5120,0.017373865842819212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,4096,0.005492266515890757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,4096,0.017442133029301962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,3584,0.00609493354956309
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,3584,0.016807466745376587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,3072,0.005645866692066193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,3072,0.01646293302377065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,2560,0.005970133344332377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,2560,0.016078933080037435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,1024,0.014697600404421488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,2048,0.005389866729577383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,2048,0.015533866484959922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,1536,0.004796800017356872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,1536,0.015158399939537048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,1024,0.0038890667259693147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,768,0.0034506666163603462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,768,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,512,0.0033290666838486993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,256,0.002825599908828735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,512,0.014411733547846476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,256,0.014230400323867798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,128,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,128,0.01406613290309906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,64,0.0024618667860825854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,64,0.013944533467292786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,8,32,32,0.002643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,8,32,32,0.013960533340771995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,16384,0.52249174118042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,16384,0.2745493253072103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,12288,0.3909258524576823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,12288,0.20909120241800944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,10240,0.3270122528076172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,10240,0.18208212852478028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,16384,0.2590496063232422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,8192,0.26267520586649573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,12288,0.19316906929016114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,8192,0.1621237277984619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,7168,0.23042666117350258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,10240,0.18052159945170085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,7168,0.13121919631958007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,8192,0.13157227039337158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,6144,0.19717547098795574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,6144,0.11267306804656982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,7168,0.11478933493296306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,5120,0.16571307182312012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,5120,0.09612586498260497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,6144,0.09927679697672526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,4096,0.13308160305023192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,4096,0.0892906665802002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,3584,0.11597332954406739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,5120,0.08561279773712158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,3584,0.07217600345611572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,4096,0.06951786677042643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,3584,0.06646399895350139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,3072,0.10098986625671387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,3072,0.06993707021077475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,3072,0.053071999549865724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,2560,0.08917333285013834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,2560,0.05677013397216797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,2048,0.06825493176778158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,2560,0.04580906629562378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,2048,0.0482154647509257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,1536,0.05164586702982584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,1536,0.03975573380788167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,2048,0.03966720104217529
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,1024,0.036713600158691406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,1024,0.031515733400980635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,768,0.027729066212972005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,1536,0.03207040031750997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,768,0.02777600089708964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,1024,0.02257279952367147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,512,0.021754666169484457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,512,0.02323413292566935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,768,0.01834133267402649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,256,0.012335999806722005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,512,0.014286933342615762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,256,0.01919040083885193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,256,0.01086186667283376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,128,0.007398400207360585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,65536,0.527462387084961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,128,0.015682133038838704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,65536,128,0.00879039963086446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,64,0.004404266675313314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,64,0.01607253352801005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,65536,32,0.004249600072701773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,65536,32,0.01609386702378591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,65536,0.2690677324930827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,65536,0.2660927931467692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,16384,0.12500373522440594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,16384,0.07923946380615235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,12288,0.0944106658299764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,12288,0.061128532886505126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,10240,0.09081172943115234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,16384,0.06886826356252035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,10240,0.05910400152206421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,12288,0.052818131446838376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,8192,0.06621119976043702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,10240,0.04888533353805542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,8192,0.04629760185877482
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,7168,0.056713600953420006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,8192,0.037375998497009275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,7168,0.04248533248901367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,6144,0.04933013518651326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,7168,0.03319573402404785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,6144,0.039061331748962404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,5120,0.04174186786015828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,5120,0.025224532683690386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,5120,0.035121067365010576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,6144,0.029132799307505293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,4096,0.03427520195643107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,4096,0.030852266152699787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,4096,0.0213536004225413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,3584,0.031115732590357464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,3584,0.02889066735903422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,3584,0.019879466295242308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,3072,0.026366933186848955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,3072,0.026891734202702838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,2560,0.022754132747650146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,2560,0.026131200790405273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,3072,0.017542399962743125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,2048,0.01884160041809082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,2048,0.022722133000691733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,2560,0.01609813372294108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,1536,0.015026133259137472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,2048,0.013142399986584983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,1536,0.02071040074030558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,1536,0.011528533697128297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,1024,0.011001599828402202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,1024,0.018811732530593872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,768,0.0090421328941981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,768,0.01772693395614624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,512,0.0069482664267222095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,1024,0.009147733449935913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,768,0.00817493349313736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,512,0.01542080044746399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,512,0.006279466549555461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,256,0.004072533299525579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,256,0.015051733454068503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,256,0.0054293334484100345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,128,0.0034005333979924522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,128,0.014685866236686707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,64,0.0030400000512599947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,16384,128,0.004805333415667216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,64,0.014590932925542196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,16384,32,0.003107200066248576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,16384,32,0.014712533354759217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,65536,0.3917663892110189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,65536,0.20221333503723143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,16384,0.09851413567860921
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,16384,0.06327466567357382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,12288,0.07537813186645508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,12288,0.052831999460856115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,16384,0.05605013370513916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,65536,0.20600105921427408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,10240,0.07194453080495199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,10240,0.04687039852142334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,12288,0.047873067855834964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,8192,0.05339839855829874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,8192,0.039683198928833006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,10240,0.03713599840799968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,7168,0.053220268090566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,7168,0.037226665019989016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,8192,0.02993280092875163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,6144,0.04105600118637085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,6144,0.03403626680374146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,5120,0.033831465244293216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,5120,0.031294933954874676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,7168,0.027586134274800618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,5120,0.021206400791803994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,6144,0.02417280077934265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,4096,0.02836373249689738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,4096,0.027832533915837603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,3584,0.024295467138290405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,3584,0.0262442668279012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,4096,0.018065067132314046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,3072,0.022029866774876915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,3584,0.016922666629155477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,3072,0.02444159984588623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,2560,0.018953599532445273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,2560,0.022588799397150673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,2560,0.013643733660380044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,3072,0.01499626636505127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,2048,0.01583253343900045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,2048,0.02114880084991455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,2048,0.011529599626859028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,1536,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,1536,0.019485867023468016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,1024,0.009551999966303508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,1024,0.017893334229787193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,1536,0.010204799969991048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,768,0.007782400151093801
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,1024,0.008190933366616566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,768,0.016218666235605875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,512,0.006117333471775055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,768,0.007191466788450877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,512,0.0150709331035614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,256,0.003673599908749262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,256,0.014909866452217101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,512,0.005718400080998739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,256,0.0052704001466433205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,128,0.0033482665816942847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,65536,0.3351690610249837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,128,0.014680533607800802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,12288,128,0.0046847999095916745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,64,0.003125333289305369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,64,0.014523733655611673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,12288,32,0.003054933249950409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,12288,32,0.014816000064214071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,65536,0.18092373212178547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,16384,0.08967573642730713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,16384,0.057426134745279946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,65536,0.1764085292816162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,12288,0.06699093182881674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,16384,0.04753493467966716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,12288,0.05025279919306437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,12288,0.03796053330103556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,10240,0.06388373374938965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,7168,0.038675200939178464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,10240,0.04070613384246826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,10240,0.031244800488154097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,8192,0.052298665046691895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,8192,0.03530346552530925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,7168,0.032978133360544844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,6144,0.033419732252756754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,8192,0.02725653251012166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,7168,0.025099732478459674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,6144,0.030011733373006184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,5120,0.028574933608373005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,5120,0.02773546576499939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,6144,0.020804266134897866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,5120,0.019013333320617675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,4096,0.023337600628534953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,4096,0.025050665934880572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,3584,0.020951465765635172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,3584,0.023733333746592204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,4096,0.016613333423932394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,3072,0.017989333470662436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,3584,0.01544533371925354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,3072,0.022425599892934165
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,2560,0.01606613298257192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,2560,0.02104640007019043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,2048,0.013204266627629599
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,2048,0.019553067286809285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,1536,0.010773332913716634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,1536,0.018706132968266807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,1024,0.008210133512814839
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,1024,0.01702186663945516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,768,0.007029333213965099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,3072,0.013566933075586953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,768,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,2048,0.0105621337890625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,2560,0.012526933352152506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,512,0.005692799886067709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,1024,0.007659733295440674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,512,0.015258666872978211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,768,0.0060138667623202005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,1536,0.009562666217486065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,256,0.003538133452335993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,256,0.014733866850535075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,512,0.005542399982611338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,128,0.003151999910672506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,128,0.014443733294804893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,256,0.004995200037956238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,64,0.0030826665461063385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,64,0.014398933450380961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,10240,32,0.003110400090614955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,10240,32,0.014427733421325684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,10240,128,0.004683733483155568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,65536,0.25706772804260253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,65536,0.14289706548055012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,16384,0.06646613279978433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,16384,0.046380798021952316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,12288,0.051217067241668704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,65536,0.14296639760335286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,12288,0.038583465417226154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,16384,0.04087893168131511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,10240,0.041970133781433105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,10240,0.035035733381907144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,12288,0.034456535180409746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,8192,0.03442773421605428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,8192,0.033624533812204996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,10240,0.027268266677856444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,7168,0.030375466744105024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,7168,0.028961066404978437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,7168,0.020427733659744263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,8192,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,6144,0.026555732885996504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,6144,0.027026132742563887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,5120,0.02286826570828756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,5120,0.025225599606831867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,4096,0.01912533243497213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,4096,0.02272746761639913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,6144,0.018207999070485432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,3584,0.01696746746699015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,3584,0.021910399198532104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,3072,0.015176533659299215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,3072,0.02066133419672648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,4096,0.01372160017490387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,2560,0.013206400473912559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,5120,0.01625599960486094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,3072,0.011609599987665812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,3584,0.012823466459910074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,2560,0.019850667317708334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,2048,0.011128532886505126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,2048,0.018992000818252565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,1536,0.009145599603652955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,1536,0.017768534024556477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,1536,0.008840533097585042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,1024,0.007086933155854543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,2560,0.011442133784294128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,2048,0.009494400024414063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,1024,0.015473066767056783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,1024,0.006355200211207073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,768,0.006223999957243601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,768,0.015465600291887918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,512,0.004276266694068909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,512,0.015000533064206442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,512,0.0053151999910672505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,256,0.003454933315515518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,256,0.01458453337351481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,768,0.005975466469923655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,256,0.005054933329423269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,128,0.0032768001159032187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,128,0.014681599537531533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,8192,128,0.0047189335028330484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,64,0.002855466554562251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,64,0.014524799585342408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,8192,32,0.0030559999247392017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,8192,32,0.014402133226394654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,65536,0.22752106984456383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,12288,0.04587200085322062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,65536,0.12980053424835206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,65536,0.1261845350265503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,16384,0.06134719848632812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,16384,0.04463680187861125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,16384,0.035692799091339114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,12288,0.037264001369476316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,12288,0.030846933523813885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,10240,0.039827199776967366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,10240,0.03390933275222778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,8192,0.03326080044110616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,8192,0.030070400238037108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,7168,0.029257599512736005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,7168,0.028278400500615437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,6144,0.025748266776402788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,6144,0.02649386723836263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,5120,0.022460800409317017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,5120,0.023912533124287923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,8192,0.021000534296035767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,6144,0.016476800044377647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,10240,0.025243733326594037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,4096,0.019080533583958944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,4096,0.022154666980107627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,7168,0.018807466824849448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,3584,0.017772799730300902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,3584,0.021153066555658975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,5120,0.014879999558130899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,3072,0.015304533640543619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,3072,0.020348799228668214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,4096,0.012772267063458761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,2560,0.012180266777674358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,2560,0.019236266613006592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,2048,0.01037013332049052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,2048,0.018595200777053834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,1536,0.008614400029182434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,1536,0.01733013391494751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,1024,0.006738133231798808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,3584,0.01211199959119161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,2048,0.008779733379681905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,1024,0.01539306640625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,3072,0.011103999614715577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,2560,0.010492799679438274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,1536,0.008284799754619598
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,768,0.00584853341182073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,768,0.01527466674645742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,768,0.005842133363087972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,512,0.003921066721280416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,512,0.014854400356610616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,1024,0.006022400160630544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,512,0.005656533439954122
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,256,0.003403733422358831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,256,0.01477120021979014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,128,0.0031487998863061273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,128,0.014516266187032065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,128,0.004861866434415182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,64,0.0027893332143624624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,64,0.014587733149528503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,7168,32,0.0028181334336598715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,7168,32,0.014325333635012307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,65536,0.19033279418945312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,65536,0.11018880208333333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,16384,0.05345493157704671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,7168,256,0.005111466844876607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,16384,0.041308800379435226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,12288,0.041316266854604086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,12288,0.03689599831899007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,12288,0.027048534154891966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,10240,0.036235733826955156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,10240,0.030824534098307294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,8192,0.02816213369369507
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,8192,0.02830186684926351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,16384,0.03201919992764791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,7168,0.024537599086761473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,65536,0.11196266810099284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,7168,0.025847466786702473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,6144,0.021219199895858763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,5120,0.018639999628067016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,6144,0.02524159948031108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,8192,0.01881386637687683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,5120,0.022932267189025878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,10240,0.022371200720469157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,4096,0.01634773313999176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,4096,0.021504000822703043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,7168,0.017122133572896322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,6144,0.015107199549674988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,3584,0.014218667149543762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,3584,0.020940800507863365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,5120,0.013617066542307535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,3072,0.012973866860071816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,3072,0.019688532749811808
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,2560,0.01165226697921753
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,2560,0.01898026665051778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,2048,0.009641599655151368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,2048,0.017834667364756265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,3584,0.011359999577204388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,4096,0.011877333124478657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,2560,0.009590400258700053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,1536,0.008089600006739299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,2048,0.008436266581217449
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,3072,0.010291199882825215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,1536,0.01597653329372406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,1536,0.0075434664885203045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,1024,0.006385066608587901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,1024,0.015565866231918335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,768,0.005150933563709259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,768,0.015038933356602988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,512,0.003818666686614355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,512,0.014739200472831726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,128,0.003083733220895131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,1024,0.006198399762312571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,256,0.003356799980004629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,256,0.014526933431625366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,256,0.004973866542180379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,128,0.014233600099881491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,64,0.0028234665592511495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,64,0.014446933070818582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,6144,32,0.0028661333024501802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,6144,32,0.014245333274205527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,65536,0.16801919937133789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,65536,0.09803307056427002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,16384,0.04761813481648763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,12288,0.03545706669489543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,512,0.005231999854246775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,16384,0.036245334148406985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,16384,0.030744532744089764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,12288,0.031121067206064862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,12288,0.023001599311828613
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,768,0.005727999905745188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,10240,0.031242666641871135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,10240,0.027924267450968425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,8192,0.026074665784835815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,8192,0.025269333521525068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,6144,128,0.004739200075467428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,7168,0.02349546750386556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,7168,0.023785599072774253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,6144,0.018222934007644652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,10240,0.020076799392700195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,6144,0.02259946664174398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,6144,0.014232533176740012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,65536,0.1003007968266805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,5120,0.015991466244061787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,5120,0.021252266565958657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,4096,0.013751467068990072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,4096,0.02016106645266215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,3584,0.012185600399971009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,7168,0.015871999661127727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,8192,0.017834667364756265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,3584,0.019370667139689126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,5120,0.012462932864824932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,3072,0.01083733340104421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,3072,0.01882986625035604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,2560,0.009709866841634114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,2560,0.01827626625696818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,2048,0.008552533388137818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,2048,0.017578667402267455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,4096,0.011416533589363098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,2048,0.008385066191355388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,3584,0.011152000228563944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,1536,0.007213866710662842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,3072,0.009629866480827332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,1536,0.01548693378766378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,2560,0.009129599730173747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,1536,0.006674133241176605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,1024,0.0057781333724657696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,1024,0.015304533640543619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,1024,0.006116266548633576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,768,0.004030933231115341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,768,0.0150218665599823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,512,0.00360000009338061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,512,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,256,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,256,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,128,0.0029109333952267963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,128,0.014184533556302389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,64,0.002889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,768,0.005730133255322774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,64,0.014200533429781595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,256,0.004939733445644379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,5120,32,0.0027615999182065325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,5120,32,0.014178133010864258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,65536,0.12738133271535237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,65536,0.07833066781361898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,16384,0.03556480010350545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,128,0.0046186665693918865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,16384,0.03221546610196431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,5120,512,0.005414400001366933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,16384,0.02904213269551595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,12288,0.02977706591288249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,12288,0.027672533194224042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,12288,0.021180800596872964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,10240,0.025554132461547852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,10240,0.02590720057487488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,65536,0.0905621369679769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,8192,0.02127679983774821
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,8192,0.02360960046450297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,7168,0.019421867529551187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,7168,0.014949333667755128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,7168,0.022689066330591836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,6144,0.016740266482035318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,5120,0.02039146622021993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,6144,0.021337600549062093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,5120,0.01469546655813853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,4096,0.019346133867899577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,4096,0.012641066312789917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,3584,0.011729066570599873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,3584,0.018809600671132406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,3584,0.01036906639734904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,3072,0.01055680016676585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,8192,0.015944533546765647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,3072,0.01836693286895752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,2560,0.008499200145403545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,2560,0.017768534024556477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,6144,0.013341866930325828
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,2048,0.007495466868082683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,2048,0.016429866353670754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,4096,0.010809600353240967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,10240,0.019217065970102944
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,1536,0.006450133522351582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,1536,0.015160533785820007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,1536,0.006842666864395141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,1024,0.004673066735267639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,3072,0.009539199868837993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,5120,0.012249599893887837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,1024,0.015085867047309876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,768,0.003807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,768,0.015202132860819497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,2048,0.007469866673151653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,512,0.0035797332723935447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,512,0.01477120021979014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,256,0.0032981333633263906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,256,0.014342400431632995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,128,0.0028757333755493166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,128,0.014471466342608133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,2560,0.009054932991663616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,64,0.002681600054105123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,64,0.014426666498184203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,4096,32,0.002796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,1024,0.005758933226267497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,4096,32,0.01414293348789215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,768,0.0053962667783101406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,65536,0.11973973115285237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,65536,0.07598079840342203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,512,0.005109333495299021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,128,0.004586666822433472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,16384,0.032686932881673174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,16384,0.02988693316777547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,12288,0.025624533494313557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,12288,0.02601813276608785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,65536,0.08566826979319254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,4096,256,0.004960000018278758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,12288,0.021208532651265464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,10240,0.022010666131973267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,10240,0.02416106661160787
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,8192,0.01874133348464966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,10240,0.018953599532445273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,8192,0.022875734170277915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,7168,0.017026132345199584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,8192,0.016149333119392394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,7168,0.022564266125361124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,6144,0.01513813336690267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,6144,0.021062399943669638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,5120,0.013402666648228964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,5120,0.019562667608261107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,4096,0.011859200398127238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,16384,0.027334400018056232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,4096,0.018769067525863648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,7168,0.014607999722162882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,6144,0.013035733501116434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,3584,0.010946133732795715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,3584,0.01827733318010966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,3072,0.010103467106819152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,3072,0.018077866236368815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,5120,0.011794132987658183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,2560,0.008155733346939087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,2560,0.01727893352508545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,2048,0.0071722666422526045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,2048,0.015812266866366068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,4096,0.01069760024547577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,1536,0.015412267049153647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,1536,0.006234666705131531
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,1024,0.004299733539422353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,3072,0.009181867043177288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,1024,0.015065600474675497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,768,0.0038463999827702843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,768,0.014692266782124838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,768,0.005560533205668131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,512,0.0034858666360378264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,2048,0.0071946665644645694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,1536,0.0065653334061304735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,512,0.014648532867431641
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,3584,0.01042560040950775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,256,0.0032245332996050516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,256,0.014351999759674073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,128,0.002841600030660629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,128,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,64,0.0027583998938401537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,64,0.014296533664067588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,2560,0.008640000224113464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3584,32,0.0027850667635599775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3584,32,0.014261333147684732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,65536,0.10283626715342205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,1024,0.00581333339214325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,65536,0.06378346681594849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,16384,0.03023146589597066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,512,0.005251200000445048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,16384,0.028755199909210206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,256,0.00491946687301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,12288,0.023602133989334105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3584,128,0.0047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,12288,0.024925865729649863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,10240,0.02107306718826294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,10240,0.02367146611213684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,10240,0.018131200472513834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,8192,0.017590399583180746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,8192,0.023113600413004556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,7168,0.015340800086657206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,7168,0.021192532777786256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,7168,0.014268799622853597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,6144,0.014164266983668008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,6144,0.020278400182723998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,16384,0.02571306626001994
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,5120,0.012520533800125123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,5120,0.01949333349863688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,5120,0.01172693371772766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,65536,0.08555306593577067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,4096,0.010966400305430096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,4096,0.01844586730003357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,8192,0.015459199746449789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,3584,0.010166399677594503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,3584,0.01836586594581604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,3584,0.010105599959691364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,3072,0.00951039989789327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,12288,0.020477867126464842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,3072,0.017633066574732462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,2560,0.007795199751853943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,2560,0.016217600305875143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,2560,0.00810346653064092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,2048,0.006850133339564006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,2048,0.015589333573977151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,2048,0.007084799806276958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,4096,0.010315733154614766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,1536,0.00617386649052302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,6144,0.013076266646385193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,1536,0.015278933445612588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,1024,0.004152533411979675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,768,0.01497066617012024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,1024,0.015122133493423461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,768,0.0038111999630928038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,3072,0.008939733107884724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,512,0.0035264000296592714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,512,0.014924800395965577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,256,0.0031210665901501974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,256,0.014417066176732381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,128,0.002976000060637792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,1024,0.005783466498057047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,128,0.014100266496340432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,768,0.005530666808287303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,64,0.0028170667588710784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,64,0.014167466759681701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,1536,0.006585599978764851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,256,0.004916266600290934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,3072,32,0.0027327999472618104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,3072,32,0.014172800381978354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,65536,0.08515520095825195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,512,0.005252266426881155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,3072,128,0.0046186665693918865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,65536,0.05798186858495077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,16384,0.026245333751042682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,16384,0.025906133651733398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,12288,0.020584533611933388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,10240,0.02180373271306356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,12288,0.023492266734441124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,8192,0.02068480054537455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,12288,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,10240,0.017844265699386595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,10240,0.017882666985193887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,8192,0.015220266580581666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,7168,0.013799466689427695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,7168,0.0199562668800354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,65536,0.084988800684611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,16384,0.02537279923756917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,6144,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,6144,0.019694934288660683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,5120,0.01111466685930888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,5120,0.019013333320617675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,4096,0.009858133395512898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,4096,0.018348799149195353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,3584,0.009242666761080424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,7168,0.01418880025545756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,3584,0.017486933867136636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,6144,0.012916266918182373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,3072,0.008411733309427898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,8192,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,3072,0.01636373301347097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,3072,0.008541867136955261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,2560,0.0073642666141192125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,2560,0.016379732886950174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,5120,0.011709866921106975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,2048,0.006646400193373363
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,2048,0.015752533078193666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,1536,0.0049098665515581764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,4096,0.01032533347606659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,1536,0.015383467078208923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,1536,0.006500266492366791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,1024,0.004133333265781402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,1024,0.015210666259129844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,1024,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,768,0.0038474666575590765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,3584,0.009737599889437358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,768,0.014812800288200378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,2048,0.006913066903750102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,512,0.0035071998834609987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,512,0.014603733023007711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,256,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,256,0.014551466703414917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,128,0.0029738667110602063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,2560,0.008100266754627227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,128,0.014139733711878457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,64,0.0027285332481066385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,768,0.005380266904830932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,64,0.014413866400718688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,512,0.005127466718355815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,65536,0.06658879915873209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2560,32,0.0027850667635599775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,256,0.004772266745567322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2560,32,0.014233600099881491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,65536,0.047858134905497236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,16384,0.020808533827463786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2560,128,0.004600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,16384,0.02334400018056234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,12288,0.017443199952443443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,12288,0.021280000607172646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,10240,0.015441067020098367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,10240,0.020848000049591066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,8192,0.012818132837613424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,8192,0.019833600521087645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,7168,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,7168,0.019056000312169395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,12288,0.020056533813476562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,6144,0.011327999830245971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,6144,0.0185589333375295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,65536,0.08485546906789145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,16384,0.02513279914855957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,6144,0.012679466605186462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,8192,0.015177599589029946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,5120,0.010030933221181234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,7168,0.01397333343823751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,5120,0.019799466927846274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,5120,0.011457066734631855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,10240,0.017511467138926186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,4096,0.00902826686700185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,4096,0.018170666694641114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,3584,0.008198399841785432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,3584,0.018180267016092936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,3072,0.007863466441631318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,3072,0.016265599926312765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,3072,0.008113066852092742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,2560,0.0068351998925209045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,2560,0.016133333245913185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,4096,0.009799466530481974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,2048,0.005950933198134104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,2048,0.01562773287296295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,1536,0.004808533191680908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,1536,0.015270400047302245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,1024,0.015102932850519816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,1536,0.006488533318042755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,3584,0.009355733791987102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,1024,0.004127999891837438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,768,0.0037098666032155357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,768,0.014797866344451904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,512,0.0033333333830038703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,2560,0.0076000000039736434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,512,0.014759467045466105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,256,0.0030730667213598887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,256,0.014430933197339377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,1024,0.005635199944178263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,128,0.002906666696071625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,128,0.014181333780288696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,768,0.005288533369700114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,64,0.0026015999416510267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,2048,0.006821333368619282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,512,0.0049216002225875854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,256,0.004849066833655039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,64,0.014268799622853597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,2048,32,0.0026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,2048,32,0.014062933127085366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,65536,0.053540265560150145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,65536,0.04046826759974162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,16384,0.016976000865300496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,16384,0.02249600092569987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,2048,128,0.004638933142026265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,12288,0.014359466234842935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,12288,0.020054399967193604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,10240,0.012522666652997335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,10240,0.019501866896947224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,8192,0.01104213297367096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,8192,0.019082667430241902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,7168,0.010147200028101603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,65536,0.08386452992757162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,7168,0.018652800718943277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,16384,0.024859732389450072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,6144,0.00921493371327718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,6144,0.017854932943979898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,5120,0.01960853338241577
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,5120,0.008574933807055155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,8192,0.014907733599344889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,4096,0.007786666850248973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,4096,0.019078399737675986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,3584,0.0077237332860628765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,3584,0.018473599354426065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,3584,0.00895360012849172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,3072,0.007011199990908305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,12288,0.019901865720748903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,3072,0.018217599391937254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,6144,0.011987200379371643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,2560,0.008337066570917765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,10240,0.017542399962743125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,2560,0.01736533244450887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,2048,0.007275733351707459
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,5120,0.010774399836858113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,2048,0.016874667008717856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,4096,0.009347200393676758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,1536,0.006701866785685222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,7168,0.013854933778444925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,1536,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,1536,0.006389333307743073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,1024,0.005201066533724466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,3072,0.008041599889596303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,1024,0.015511467059453329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,2560,0.00765119989713033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,768,0.004611200094223023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,768,0.015135999520619711
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,2048,0.006863999863465626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,512,0.0038090666135152185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,512,0.014945066968599954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,256,0.0032255999743938447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,256,0.01452906628449758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,128,0.002919466545184453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,128,0.014297599593798319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,128,0.004514133433500925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,64,0.00288426677385966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,64,0.014268799622853597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,768,0.0052714665730794275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1536,32,0.0028938665986061097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1536,32,0.01434879998366038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,65536,0.036133333047231035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,512,0.004969599843025208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,1024,0.005787733197212219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1536,256,0.004669866462548574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,65536,0.03229973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,16384,0.01332373321056366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,16384,0.020116267601648967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,12288,0.011244799693425496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,12288,0.018403200308481853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,12288,0.019731199741363524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,10240,0.010227200388908387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,10240,0.018484266599019368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,8192,0.009063466389973959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,8192,0.017378133535385133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,8192,0.014470400412877402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,7168,0.008422399560610454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,7168,0.017526400089263917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,65536,0.08419946829477945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,6144,0.007712000111738841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,6144,0.017117865880330405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,6144,0.011694932977358501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,5120,0.007719466586907704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,5120,0.021859200795491536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,10240,0.017191465695699057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,4096,0.006537599861621857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,4096,0.019742933909098308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,4096,0.009339732925097148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,3584,0.006678399940331777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,3584,0.021436800559361778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,3072,0.006137600044409434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,16384,0.024708267052968344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,7168,0.013089066743850708
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,3072,0.021975467602411904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,2560,0.009788800279299419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,2560,0.020068265994389854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,2560,0.007715199887752533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,2048,0.008412800232569377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,3584,0.00885653297106425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,2048,0.016953599452972413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,1536,0.010479999581972758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,1536,0.016427733500798545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,1536,0.006387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,1024,0.007912533481915791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,3072,0.008062933385372163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,5120,0.01053653359413147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,1024,0.016777600844701132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,768,0.0065194666385650635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,768,0.015964800119400026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,512,0.00513919989267985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,256,0.014935466647148132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,512,0.015533866484959922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,2048,0.006759466727574666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,256,0.00386559988061587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,128,0.0032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,128,0.01471466620763143
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,64,0.0030048000315825146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,64,0.01446399986743927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,1024,32,0.0031648000081380212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,768,0.005369600156943003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,1024,32,0.01447466711203257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,65536,0.029818665981292725
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,65536,0.0288266658782959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,1024,0.005657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,128,0.004620799918969473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,256,0.004714666803677877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,16384,0.011087999741236369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,16384,0.020102399587631225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,12288,0.009335466225941976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,12288,0.017554134130477905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,10240,0.008726400136947633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,1024,512,0.005061333378156027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,10240,0.017498666048049928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,8192,0.008072533210118612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,8192,0.01713599960009257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,65536,0.08416000207265219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,7168,0.007593599955240886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,7168,0.017594667275746663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,12288,0.01928000052769979
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,6144,0.00680213322242101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,6144,0.017164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,5120,0.006793599824110668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,5120,0.019642666975657145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,16384,0.02480319937070211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,4096,0.006218666831652323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,7168,0.013078400492668152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,10240,0.016940800348917644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,6144,0.01176533301671346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,4096,0.018310399850209554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,3584,0.0064085334539413456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,3584,0.018153599898020425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,5120,0.010433066884676616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,2560,0.00783253312110901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,3584,0.00883840024471283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,3072,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,8192,0.014350933829943338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,3072,0.017749333381652833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,2560,0.017027199268341064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,2048,0.00695360004901886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,2048,0.016450132926305136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,1536,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,3072,0.008044800162315369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,1536,0.006318933268388112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,1024,0.004557866851488749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,1024,0.015382400155067444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,2560,0.007682133217652638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,4096,0.009266133109728496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,768,0.0043594668308893835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,768,0.015077333648999533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,512,0.0037685332198937735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,512,0.014983466267585755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,256,0.0031829332311948144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,256,0.014284800489743552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,128,0.0029728000362714132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,1024,0.005559466779232025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,128,0.014258133371671042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,64,0.0027637332677841187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,2048,0.0068458666404088335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,768,32,0.002885333448648453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,64,0.014222932855288186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,768,32,0.014377599954605103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,65536,0.02121280034383138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,768,0.005252266426881155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,65536,0.024086399873097738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,16384,0.009142399827639262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,1536,0.0063967997829119366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,16384,0.017781333128611247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,12288,0.007916800181070964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,512,0.0050911997755368555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,12288,0.017439999183019004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,10240,0.007819733520348867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,10240,0.01741546591122945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,10240,0.016720000902811685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,8192,0.006871466835339864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,128,0.004574933151404063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,8192,0.01727466583251953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,7168,0.006495999793211619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,7168,0.017638399203618368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,16384,0.02431360085805257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,6144,0.006045866509278615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,6144,0.017595734198888144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,768,256,0.0048426667849222815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,5120,0.0062730665008227035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,12288,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,5120,0.018017067511876424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,4096,0.005861333509286245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,4096,0.017244799931844076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,3584,0.006485333542029063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,65536,0.08387520313262939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,8192,0.01402453382809957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,3584,0.017058134078979492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,3072,0.006005333364009857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,3072,0.016476800044377647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,2560,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,2560,0.016321067015329996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,7168,0.012802132964134216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,2560,0.007648000121116638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,2048,0.00547733356555303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,6144,0.011706667145093282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,2048,0.015737600127855935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,2048,0.0066997334361076355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,1536,0.004785066843032837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,5120,0.01036906639734904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,1536,0.015461333592732749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,1536,0.006361599763234456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,1024,0.004089600096146265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,1024,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,768,0.003955200066169103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,768,0.014748799800872802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,4096,0.009236266215642292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,512,0.0033770665526390077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,512,0.014657066265741984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,3072,0.008032000064849854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,256,0.003127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,256,0.014083199699719749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,128,0.0029098667204380036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,128,0.014085333546002707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,64,0.0026549334327379864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,64,0.01411733329296112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,512,32,0.0028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,512,32,0.01409173309803009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,3584,0.008771199981371562
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,768,0.005323733389377594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,65536,0.013751467068990072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,65536,0.02113599975903829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,16384,0.006740266581376393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,512,0.004981333514054617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,12288,0.0060597335298856105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,16384,0.017964800198872886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,12288,0.01709866722424825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,128,0.004519466559092204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,1024,0.005660800139109293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,10240,0.0063360000650088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,10240,0.01726079980532328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,65536,0.08351893424987793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,8192,0.006155733267466227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,8192,0.0170741339524587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,512,256,0.004745600124200186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,7168,0.005907199780146281
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,7168,0.01755519906679789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,16384,0.0236629327138265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,6144,0.005973333120346069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,6144,0.017164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,6144,0.01156160036722819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,12288,0.018821332852045694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,5120,0.006371200084686279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,5120,0.018167465925216675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,10240,0.01637226641178131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,5120,0.01033066709836324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,4096,0.00589333325624466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,4096,0.017036799589792886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,3584,0.00631573349237442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,3584,0.016541866461435954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,3584,0.008667733271916707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,8192,0.01399679978688558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,3072,0.006049066781997681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,3072,0.016105600198109946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,2560,0.006022400160630544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,7168,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,2560,0.015890133380889893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,4096,0.009035733342170716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,2048,0.005492266515890757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,2048,0.015592533349990844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,1536,0.004714666803677877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,1536,0.0151146670182546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,1536,0.006332799792289734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,2560,0.007464533547560374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,1024,0.004026666780312856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,1024,0.01493013302485148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,3072,0.008035199840863545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,768,0.003647999962170919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,768,0.014514133334159851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,2048,0.00674773355325063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,512,0.0033471999069054925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,512,0.014665599664052328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,512,0.0050357331832249965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,256,0.0029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,256,0.01416000028451284
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,128,0.002812800059715907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,128,0.014267733693122864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,64,0.002614400039116542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,64,0.01421440045038859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,1024,0.0055744002262751256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,256,32,0.0027306665976842242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,65536,0.018857600291570027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,256,32,0.013948800166447959
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,65536,0.010682666301727295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,16384,0.005846400062243144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,768,0.0052704001466433205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,16384,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,12288,0.005830400188763936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,12288,0.017115734020868936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,10240,0.006065066655476888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,128,0.004496000210444133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,10240,0.017539199193318686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,10240,0.01630826691786448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,8192,0.005925333499908448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,8192,0.01650879979133606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,256,256,0.004671999812126159
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,7168,0.005914666752020518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,16384,0.023679999510447185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,7168,0.017123200496037803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,6144,0.005703466633955637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,6144,0.016993065675099693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,12288,0.01885120073954264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,5120,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,65536,0.08298559983571371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,5120,0.0172437330087026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,4096,0.005534933507442474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,4096,0.016816000143686928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,8192,0.014043733477592468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,3584,0.0059914668401082356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,7168,0.012728533148765564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,3584,0.016314666469891867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,3072,0.005915733178456625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,3072,0.01627626617749532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,6144,0.011667199929555257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,2560,0.005726933479309082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,5120,0.010327466328938802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,2560,0.015821866194407144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,2560,0.007561600208282471
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,2048,0.00513919989267985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,2048,0.015260799725850423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,1536,0.004444799820582072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,3584,0.008706133564313252
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,1536,0.01511360009511312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,1536,0.006227200229962667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,1024,0.0038293334345022834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,1024,0.015050666530927024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,4096,0.009198932846387228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,768,0.00365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,2048,0.006648533542950948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,512,0.014556800325711569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,3072,0.0079434668024381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,768,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,512,0.0033333333830038703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,512,0.0048981333772341405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,256,0.003155199935038885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,256,0.014230400323867798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,128,0.002755200117826462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,128,0.014155733585357665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,64,0.00264533335963885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,64,0.01430506706237793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,128,32,0.0025770666698614756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,1024,0.005510400235652924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,128,32,0.014190933108329773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,768,0.005120000243186951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,65536,0.007973333199818928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,65536,0.017939200003941856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,16384,0.005636266867319743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,16384,0.017063466707865398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,12288,0.005723733206590017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,12288,0.01702079971631368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,8192,0.016432000199953715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,10240,0.005921066800753275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,10240,0.017343999942143758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,8192,0.005774933099746704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,256,0.004709333181381226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,7168,0.005718400080998739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,7168,0.016952532529830932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,4096,0.005569066603978475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,6144,0.005529599885145823
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,6144,0.01717653274536133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,5120,0.006011733412742614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,4,128,128,0.004502399762471517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,5120,0.017318399747212727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,4096,0.016548267006874083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,3584,0.005874133110046387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,3584,0.016270933548609416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,3072,0.005737600227197012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,3072,0.016058666507403056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,2560,0.005749333401521047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,2560,0.01571626663208008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,768,0.003585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,2048,0.005083733300367991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,2048,0.015356799960136414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,1536,0.004437333345413208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,1536,0.0151637335618337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,1024,0.0038015998899936674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,1024,0.014886400103569031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,768,0.014644267161687216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,512,0.003269333392381668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,512,0.014759467045466105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,256,0.0030464000999927522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,256,0.014263466993967692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,128,0.0028309332827727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,128,0.014244266351064048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,64,0.0026677332818508146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,64,0.014193066954612732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,64,32,0.002571733295917511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,64,32,0.01409066617488861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,65536,0.007870933413505555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,65536,0.01835626761118571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,16384,0.005804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,16384,0.017015467087427773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,12288,0.0056757330894470215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,12288,0.017117865880330405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,10240,0.005709866682688395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,10240,0.017374932765960693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,8192,0.005795200169086456
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,8192,0.016908800601959227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,7168,0.005637333293755849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,7168,0.017180800437927246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,6144,0.005580799778302511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,6144,0.016780799627304076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,5120,0.005846400062243144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,5120,0.01729493339856466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,3072,0.01597653329372406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,4096,0.005498666564623515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,2560,0.015820800264676412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,4096,0.016694400707880655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,2048,0.015426133076349893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,3584,0.0059797331690788266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,3584,0.01635840038458506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,3072,0.005676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,2560,0.005678933362166087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,2048,0.004994133114814758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,1536,0.004454400142033895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,1536,0.014987732966740927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,1024,0.003824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,1024,0.014776532848676046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,768,0.0036501333117485045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,768,0.014500266313552857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,512,0.003202133377393087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,512,0.014457600315411887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,256,0.002921599894762039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,256,0.014339199662208557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,128,0.0027072000006834666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,128,0.014219733079274497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,64,0.0027402666707833606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,64,0.014195199807484946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,4,32,32,0.0025397333006064097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,4,32,32,0.014087466398874917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,16384,0.27464211781819664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,16384,0.5219818751017253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,12288,0.20878613789876305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,12288,0.39088853200276696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,12288,0.19349013964335124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,10240,0.32642027537027996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,10240,0.18219839731852214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,10240,0.16230506896972657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,8192,0.27551466623942056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,8192,0.1447231928507487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,8192,0.14299519856770831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,7168,0.23972160021464028
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,7168,0.13022293249766032
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,6144,0.19715946515401203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,6144,0.126909867922465
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,16384,0.2597194671630859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,5120,0.16502505938212078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,5120,0.10295466581980388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,4096,0.07986346880594888
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,4096,0.14615893363952637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,3584,0.11569600105285645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,3584,0.07124479611714682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,3584,0.06562666495641073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,3072,0.10115946928660076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,7168,0.11687359809875489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,3072,0.06360853513081868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,2560,0.08362560272216797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,2560,0.05528639952341715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,2048,0.06786026954650878
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,2048,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,2048,0.03693866729736328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,1536,0.05179200172424316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,1536,0.03935786485671997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,5120,0.08393173217773438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,1024,0.036194133758544925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,4096,0.0747050682703654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,1024,0.03150186737378438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,1024,0.023016534248987832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,768,0.027564799785614012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,3072,0.0526367982228597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,6144,0.10053333441416423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,768,0.02755519946416219
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,2560,0.047838934262593585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,512,0.019525333245595296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,512,0.02280319929122925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,256,0.011219200491905213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,1536,0.02928000092506409
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,256,0.01928960084915161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,128,0.007451733450094859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,128,0.015639467040697734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,64,0.004665599763393402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,64,0.015821866194407144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,65536,32,0.004375466704368591
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,768,0.01834453344345093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,65536,32,0.016084266702334087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,16384,0.124946133295695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,65536,0.2672064145406087
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,16384,0.07884906927744548
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,65536,0.5270880063374836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,12288,0.09959572950998942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,12288,0.06244800090789795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,256,0.010930132865905762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,10240,0.07916693687438965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,12288,0.05821119944254557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,10240,0.05529706478118897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,10240,0.04496106704076131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,8192,0.06403306722640992
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,128,0.008449066678682964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,8192,0.04585386514663696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,8192,0.038121600945790604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,7168,0.05644906759262085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,7168,0.04212373495101929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,6144,0.049054932594299314
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,65536,512,0.014637866616249084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,5120,0.04147306680679321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,6144,0.03919999996821086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,5120,0.03451626698176066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,4096,0.034246400992075605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,65536,0.2584949334462484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,16384,0.0694762627283732
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,4096,0.031128533681233722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,3584,0.02995413343111674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,3584,0.030318933725357055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,3584,0.01961173415184021
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,7168,0.03287146687507629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,6144,0.02918506662050883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,3072,0.027078400055567425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,3072,0.026855466763178508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,2560,0.02257919907569885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,2560,0.024644267559051514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,2048,0.019099734226862588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,2048,0.022801067431767783
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,1536,0.01495146652062734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,1536,0.02069759964942932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,5120,0.0255786657333374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,1024,0.01095146636168162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,3072,0.01744640072186788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,1024,0.01898026665051778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,4096,0.021242666244506835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,768,0.009016533692677815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,2560,0.015474133690198264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,768,0.01768853267033895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,512,0.007073066631952922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,512,0.015250133474667868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,512,0.006218666831652323
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,256,0.004054400076468786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,2048,0.013114666938781739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,256,0.014944000045458474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,128,0.0033471999069054925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,1536,0.011256532867749532
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,128,0.014708266655604044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,64,0.014535466829935709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,64,0.003180799881617228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,16384,32,0.0030901332696278887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,16384,32,0.01477013329664866
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,16384,0.09780480066935221
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,65536,0.20072959264119467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,65536,0.3906485239664713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,65536,0.20640533765157065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,768,0.008198399841785432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,16384,0.06277653376261393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,12288,0.05845439831415812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,256,0.00537066658337911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,12288,0.085044264793396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,12288,0.042701868216196696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,128,0.004774400095144907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,10240,0.06368746757507324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,10240,0.04665600061416626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,10240,0.037367467085520426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,8192,0.054924801985422766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,16384,1024,0.00914026697476705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,8192,0.040446933110555014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,7168,0.04861439863840739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,7168,0.03744106690088908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,6144,0.040863998730977374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,6144,0.03391786813735962
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,6144,0.0240831991036733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,5120,0.035537068049112955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,5120,0.030770132939020794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,8192,0.031141332785288495
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,4096,0.028380799293518066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,4096,0.027906133731206255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,7168,0.02796906630198161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,3584,0.024895999828974405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,16384,0.05852799812952677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,3584,0.026178133487701417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,3584,0.016669867436091106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,3072,0.021566933393478392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,3072,0.024500266710917155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,2560,0.018902399142583213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,2560,0.022644267479578654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,2048,0.01575040022532145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,2048,0.02086506684621175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,5120,0.02135253349939982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,1536,0.012583466370900473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,1536,0.019578667481740315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,1024,0.009495466947555542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,1024,0.017922133207321167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,1024,0.008193066716194153
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,4096,0.018001067638397216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,768,0.007815466821193695
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,768,0.016577066977818807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,2560,0.01346986691157023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,768,0.006908800204594929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,512,0.006286933521429698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,512,0.015154133240381876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,512,0.005729066828886667
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,2048,0.011452800035476685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,256,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,256,0.014881066481272378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,256,0.005206400156021118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,128,0.0032885332902272543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,128,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,3072,0.015820800264676412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,128,0.004711466530958811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,64,0.0029834667841593427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,12288,1536,0.010012800494829815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,64,0.014494933684666953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,12288,32,0.003019733230272929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,12288,32,0.014620799819628397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,65536,0.3321791966756185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,16384,0.08726399739583333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,65536,0.1783093293507894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,16384,0.0567957321802775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,12288,0.0679189364115397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,12288,0.05008426507314047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,10240,0.05640746752421061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,10240,0.043442134062449136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,8192,0.046621867020924884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,8192,0.035230934619903564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,65536,0.16766187349955242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,7168,0.03882453441619873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,7168,0.03264960050582886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,7168,0.025298132499059038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,6144,0.035869868596394856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,6144,0.030294400453567506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,5120,0.02858453392982483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,5120,0.027299199501673383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,5120,0.019118932882944743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,4096,0.023540266354878745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,4096,0.024642133712768556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,12288,0.03847786585489909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,3584,0.020856533447901407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,10240,0.03147520025571187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,3584,0.023389865954717003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,16384,0.048981332778930665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,8192,0.027135999997456868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,3072,0.01840426723162333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,3072,0.022299732764561972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,2560,0.01592639982700348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,2560,0.02104640007019043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,2048,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,3584,0.01511360009511312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,2048,0.01983039975166321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,2048,0.010585600137710571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,1536,0.010663466652234395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,1536,0.018320000171661376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,6144,0.021283199389775596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,3072,0.01290986637274424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,4096,0.016320000092188515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,1024,0.008076799909273784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,1024,0.01732800006866455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,768,0.006980266670385997
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,768,0.01502826710542043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,2560,0.012351999680201214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,512,0.005633066594600678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,512,0.015100799997647605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,256,0.0034783999125162757
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,256,0.01469013293584188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,128,0.0031818665564060213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,128,0.014203733205795288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,64,0.002889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,1024,0.007761066655317943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,64,0.014334932963053385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,10240,32,0.0029077333708604175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,10240,32,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,1536,0.009324799974759419
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,65536,0.26439785957336426
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,65536,0.14249919255574545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,16384,0.06621119976043702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,16384,0.05120533307393392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,256,0.0050794666012128195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,16384,0.040752001603444415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,12288,0.0524895985921224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,12288,0.03824853499730428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,128,0.004691199958324432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,512,0.005485866467158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,10240,768,0.0062047998110453285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,10240,0.042021334171295166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,10240,0.03460479974746704
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,10240,0.027381332715352376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,8192,0.03427199920018514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,8192,0.03153066635131836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,8192,0.022230400641759237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,7168,0.030180267492930096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,7168,0.028845866521199543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,65536,0.1422719955444336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,7168,0.020195200045903524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,6144,0.026675200462341307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,6144,0.026894932985305785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,6144,0.018068265914916993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,5120,0.022842667500178018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,12288,0.031930667161941526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,5120,0.026314665873845417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,4096,0.01946773330370585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,4096,0.022655999660491942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,4096,0.013658666610717773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,3584,0.016834133863449098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,3584,0.02157013416290283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,3072,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,5120,0.016302933295567833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,3072,0.020827732483545938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,2560,0.013235200444857279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,2560,0.01991466681162516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,2048,0.010973866780598958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,2048,0.01885653336842855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,2048,0.009590400258700053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,1024,0.007084799806276958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,1536,0.009178666273752849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,1536,0.01785279909769694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,3584,0.012806399663289388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,1024,0.015898666779200234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,768,0.006100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,2560,0.010630399982134501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,768,0.01555519998073578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,512,0.004554666578769684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,3072,0.011596799890200297
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,512,0.015075199802716575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,256,0.0035616000493367515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,1536,0.008578133583068848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,1024,0.006623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,256,0.014633599917093912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,128,0.003127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,128,0.014587733149528503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,768,0.005894400179386139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,64,0.0029365333418051405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,65536,0.2273087978363037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,65536,0.128439466158549
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,512,0.005385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,64,0.014331733187039694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,8192,32,0.0028437333802382152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,8192,32,0.014516266187032065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,128,0.004569600025812784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,16384,0.06048426628112793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,16384,0.04432213306427002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,16384,0.0359775980313619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,12288,0.045765332380930585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,8192,256,0.005032533407211303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,12288,0.03710506757100423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,10240,0.041767466068267825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,10240,0.03328319986661275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,65536,0.12533120314280194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,8192,0.033617067337036136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,8192,0.030049065748850506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,12288,0.0279914657274882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,8192,0.020584533611933388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,7168,0.02881386677424113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,7168,0.027899734179178876
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,7168,0.019629865884780884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,6144,0.025458133220672606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,6144,0.026361600557963057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,5120,0.0221781333287557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,5120,0.02377386689186096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,4096,0.01877760092417399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,10240,0.02449493408203125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,4096,0.021915733814239502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,4096,0.012758400042851767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,3584,0.016976000865300496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,6144,0.01667840083440145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,3584,0.021179733673731486
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,3584,0.01188800036907196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,5120,0.01492800017197927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,3072,0.015372799833615622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,3072,0.020247467358907065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,2560,0.011960533261299134
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,2560,0.019307732582092285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,2048,0.010342400272687275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,2048,0.018263467152913413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,1536,0.008628267049789428
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,1536,0.01762239933013916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,1024,0.005864533285299936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,1024,0.006793599824110668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,1024,0.014995200435320535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,768,0.005857066810131073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,3072,0.010937600334485372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,768,0.015146666765213012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,1536,0.00809386670589447
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,512,0.004121600091457367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,2560,0.010133333007494609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,512,0.01492693324883779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,256,0.0034293333689371743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,2048,0.008983467022577922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,256,0.014477866888046264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,256,0.004855466882387797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,128,0.0032106667757034303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,128,0.014541866381963095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,64,0.0031498665610949195
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,768,0.005860266586144766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,64,0.014267733693122864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,7168,32,0.0029109333952267963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,7168,32,0.01455573340257009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,65536,0.18964266777038574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,65536,0.10961066881815593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,65536,0.11143360137939454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,16384,0.05151040156682333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,16384,0.03899626731872559
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,128,0.004667733112970988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,12288,0.04595520099004109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,12288,0.0353877345720927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,10240,0.03595306475957234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,7168,512,0.005470933516820272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,10240,0.032017066081364946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,8192,0.028753066062927247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,8192,0.02782613237698873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,7168,0.026444800694783527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,7168,0.026662399371465046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,7168,0.016935465733210246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,6144,0.021636267503102623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,6144,0.025364265839258833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,6144,0.015130666891733804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,5120,0.019357866048812865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,12288,0.025250132878621417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,5120,0.023510400454203287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,10240,0.022087466716766358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,8192,0.019030400117238364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,16384,0.03237013419469197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,4096,0.015749333302179973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,4096,0.02150933345158895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,3584,0.014115200440088908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,3584,0.020795732736587524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,3072,0.013424000144004822
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,3072,0.019588265816370645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,3072,0.010125866532325745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,4096,0.011804800232251484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,2560,0.011201066772143047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,2560,0.018873600165049235
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,5120,0.01378986636797587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,2048,0.00956053336461385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,2048,0.018205867211023966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,2048,0.008468266328175862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,1536,0.008180266618728638
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,3584,0.01136959989865621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,1536,0.01679253379503886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,1024,0.006346666812896728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,1024,0.015459199746449789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,768,0.005371733506520589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,768,0.014937600493431092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,512,0.00395413339138031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,512,0.014919466773668923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,256,0.003272533416748047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,256,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,2560,0.009460266431172688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,1024,0.005894400179386139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,128,0.0029685333371162414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,128,0.014409599701563516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,64,0.0027583998938401537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,768,0.0056202664971351625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,64,0.014328533411026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,6144,32,0.0028064000109831494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,6144,32,0.014284800489743552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,1536,0.0077237332860628765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,65536,0.16342933972676593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,65536,0.09680000146230062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,16384,0.04744960069656372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,256,0.00496319979429245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,16384,0.036507733662923175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,16384,0.030826665957768756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,12288,0.036356266339619955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,12288,0.030778666337331135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,12288,0.0233407994111379
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,128,0.00454720010360082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,10240,0.031070933739344282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,10240,0.027986133098602296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,6144,512,0.005274666845798493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,10240,0.020456532637278237
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,8192,0.025783467292785644
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,65536,0.0942250649134318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,8192,0.02547520001729329
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,7168,0.023471999168395995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,7168,0.023944532871246337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,7168,0.015838932991027833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,6144,0.0181877334912618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,6144,0.02239146629969279
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,8192,0.017280000448226928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,5120,0.01579200029373169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,5120,0.021617066860198975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,5120,0.012878933548927307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,4096,0.013409066200256347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,4096,0.020032000541687012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,4096,0.011355732878049214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,3584,0.01206933359305064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,6144,0.015097600221633912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,3584,0.01973759929339091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,3072,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,3072,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,3072,0.009809066851933796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,2560,0.00962453285853068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,2560,0.018229333559672038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,2560,0.00913279950618744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,2048,0.008501332998275758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,2048,0.01733120083808899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,1536,0.007228800157705943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,1536,0.01556373337904612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,1024,0.0057536001006762184
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,1024,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,1024,0.006043733159701029
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,768,0.004194133480389913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,768,0.014849066734313965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,2048,0.0079925333460172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,3584,0.010854400197664897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,512,0.0038122666378815973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,512,0.014661332964897156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,512,0.0053941334287325535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,256,0.0032672000428040824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,256,0.01428053379058838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,1536,0.007011199990908305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,128,0.0030602666238943735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,128,0.014173866311709086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,128,0.004704000055789947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,64,0.0028181334336598715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,64,0.014330666263898215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,5120,32,0.002734933296839396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,5120,32,0.014567466576894126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,768,0.005400533477465311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,65536,0.12682987054189046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,65536,0.07787626584370931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,12288,0.02969599962234497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,65536,0.09022933642069499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,16384,0.035816534360249834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,16384,0.03172053297360738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,12288,0.028853332996368407
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,10240,0.025579732656478883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,10240,0.02531519929567973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,8192,0.021091200908025107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,8192,0.023591466744740806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,8192,0.015779200196266174
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,5120,256,0.005003733436266581
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,7168,0.018899200359980266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,7168,0.022460800409317017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,6144,0.016505600015322367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,6144,0.021229867140452066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,5120,0.014563199877738953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,5120,0.020066134134928384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,10240,0.01837973395983378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,4096,0.012664533654848733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,4096,0.019246933857599895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,3584,0.011524266997973124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,3584,0.018627200524012247
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,7168,0.015246933698654175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,3072,0.010501333077748616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,16384,0.02682773272196452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,6144,0.013522133231163025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,12288,0.02187626759211222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,3072,0.01795519987742106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,2560,0.0086218665043513
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,2560,0.017678932348887125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,2560,0.00899733304977417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,2048,0.007518933216730754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,2048,0.016135467092196147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,2048,0.007614933451016744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,1536,0.006593066453933716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,4096,0.010617599884668986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,1536,0.01495253344376882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,3584,0.010196266571680705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,1536,0.006669866542021434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,1024,0.004492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,1024,0.015358933806419372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,3072,0.009193600217501322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,1024,0.005553066730499268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,5120,0.012270933389663697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,768,0.003909333298603693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,768,0.014837333559989929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,512,0.003565866748491923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,512,0.01478506624698639
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,256,0.003151999910672506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,256,0.014478933811187745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,256,0.004783999919891357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,128,0.0028959999481836954
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,128,0.01421119968096415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,64,0.0028351999819278715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,512,0.004994133114814758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,768,0.005465599894523621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,64,0.01441493332386017
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,4096,32,0.0027776000400384264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,4096,32,0.014124799768129984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,65536,0.11795307000478109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,65536,0.07413547039031983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,16384,0.03212053378423055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,16384,0.02994133234024048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,12288,0.027065600951512652
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,12288,0.02640426754951477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,10240,0.021995733181635536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,10240,0.023923200368881226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,8192,0.018473599354426065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,8192,0.02227413256963094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,8192,0.016177067160606386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,7168,0.017026132345199584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,4096,128,0.0046293333172798155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,7168,0.021900800863901775
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,16384,0.027115732431411743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,6144,0.014968533317248026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,6144,0.020760534207026164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,65536,0.09057493209838867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,6144,0.013559466600418091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,10240,0.01813439925511678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,5120,0.013460266590118408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,5120,0.019577600558598838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,7168,0.014946132898330688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,4096,0.012449066837628682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,4096,0.01869973341623942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,4096,0.010546132922172546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,3584,0.010839466253916423
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,3584,0.018004266421000163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,3072,0.010045866171518963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,12288,0.02168853282928467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,3072,0.017707733313242595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,2560,0.008144000172615051
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,2560,0.01690559983253479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,2560,0.00857919951279958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,2048,0.007129600147406261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,2048,0.015346133708953857
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,2048,0.007147733370463054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,1536,0.006252799928188324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,3584,0.01039466659228007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,3072,0.009195733070373534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,1536,0.015265066425005594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,1536,0.006607999900976817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,1024,0.004418133199214936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,5120,0.01227839986483256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,1024,0.014974932869275412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,768,0.003908266623814901
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,768,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,512,0.0036992001036802924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,512,0.01467626690864563
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,256,0.0032266666491826378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,1024,0.005706666906674703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,256,0.014237866799036662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,128,0.0030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,128,0.014071466525395713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,768,0.005468800167242686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,128,0.004555733501911163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,64,0.002713600049416224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,64,0.014111999670664468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3584,32,0.0027701333165168762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3584,32,0.014227199554443359
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,65536,0.10268692970275879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,65536,0.06289066473642985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,16384,0.029816534121831256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,256,0.00488319993019104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,16384,0.028714666763941448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,16384,0.025596799453099568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,12288,0.0252895991007487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,12288,0.02531519929567973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,10240,0.02013333241144816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,10240,0.023578667640686037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,8192,0.01790293256441752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3584,512,0.005298133194446564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,8192,0.021922133366266885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,65536,0.08494933446248373
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,7168,0.01548373301823934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,7168,0.021220266819000244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,6144,0.014008532961209616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,6144,0.019973333676656088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,5120,0.013053866227467856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,5120,0.019234132766723634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,12288,0.020430932442347206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,4096,0.011099732915560405
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,10240,0.017908267180124917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,8192,0.015410133202870688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,4096,0.01830079952875773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,3584,0.010282666484514872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,3584,0.018028799692789713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,5120,0.011669333775838215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,3072,0.00931946635246277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,3072,0.017358932892481485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,2560,0.007733333110809326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,2560,0.016076800227165223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,2048,0.006805333495140076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,7168,0.01421440045038859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,2048,0.015530666708946228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,4096,0.010360532999038696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,1536,0.005975466469923655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,6144,0.012950399518013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,1536,0.015270400047302245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,3584,0.00988266666730245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,1024,0.004327466587225596
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,1024,0.014949333667755128
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,1024,0.005724800129731497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,768,0.004101333270470301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,768,0.014630400141080222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,3072,0.008999466896057129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,512,0.003565866748491923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,2560,0.007846400141716003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,2048,0.00689279983441035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,512,0.014569600423177084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,256,0.0031370667119820913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,256,0.014484266440073649
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,128,0.002885333448648453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,768,0.005376000205675761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,128,0.01406719982624054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,128,0.004691199958324432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,32,0.014223999778429666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,64,0.002765866617361705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,3072,64,0.01421119968096415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,3072,32,0.002644266684850057
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,1536,0.006592000027497609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,65536,0.08482453028361002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,65536,0.05602026780446371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,65536,0.08515840371449789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,512,0.005172266562779745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,16384,0.0260863999525706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,16384,0.025883734226226807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,3072,256,0.004775466521581014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,12288,0.019939200083414713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,12288,0.022977066040039063
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,10240,0.017515732844670614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,10240,0.021499733130137123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,8192,0.015032533804575601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,8192,0.020523732900619505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,16384,0.025437867641448973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,7168,0.013819733262062072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,7168,0.019741866985956827
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,6144,0.012294399738311767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,6144,0.01996586720148722
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,6144,0.012806399663289388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,10240,0.017624533176422118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,5120,0.011001599828402202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,5120,0.018943999210993448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,5120,0.011618133385976155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,4096,0.009849599997202555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,12288,0.020406399170557657
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,4096,0.018127999703089395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,8192,0.015373866756757101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,7168,0.014009599884351095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,3584,0.009101866682370504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,3584,0.017240534226099648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,3584,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,3072,0.008598400155703227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,3072,0.016074666380882265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,3072,0.00841813286145528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,2560,0.007378133138020833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,2560,0.016266666849454246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,2560,0.007893333335717519
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,2048,0.006733866532643636
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,2048,0.015717333555221556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,2048,0.006850133339564006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,1536,0.0049781332413355505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,1536,0.015256533026695251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,1536,0.006411733229955037
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,1024,0.004127999891837438
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,1024,0.014921599626541137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,768,0.0037418665985266366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,4096,0.010141866405804952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,768,0.014851199587186179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,768,0.005352533360322317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,512,0.0035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,512,0.014791466792424521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,256,0.00308693324526151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,256,0.014427733421325684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,128,0.002765866617361705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,128,0.014153599739074707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,64,0.0026687999566396077
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,64,0.0143477330605189
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2560,32,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2560,32,0.014327466487884521
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,65536,0.06805973052978516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,65536,0.04732480049133301
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,65536,0.08388799826304118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,16384,0.020754132668177286
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,16384,0.02392746607462565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,12288,0.017930666605631508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,512,0.0051701332132021586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,12288,0.021266132593154907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,10240,0.01576533317565918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,256,0.0049002667268117275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,128,0.004610133171081543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,10240,0.020619734128316244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,8192,0.013577600320180258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2560,1024,0.005836800237496694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,8192,0.01978773276011149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,7168,0.012584533294041952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,7168,0.018920532862345376
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,16384,0.024971733490626015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,6144,0.011266133189201355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,6144,0.01846826672554016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,12288,0.020020266373952232
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,5120,0.010168533523877461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,5120,0.01975359916687012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,5120,0.011342933773994446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,4096,0.008992000420888265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,8192,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,4096,0.017863466342290243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,4096,0.009635200103123982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,3584,0.008295466502507527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,3584,0.01780479947725932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,3584,0.008973866701126099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,3072,0.007768533130486806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,3072,0.01605013310909271
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,3072,0.008141866823037466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,2560,0.007113599777221679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,2560,0.015844266613324484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,2560,0.007577600081761678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,2048,0.006200533111890157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,6144,0.012495999534924824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,2048,0.015723733107248943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,2048,0.006759466727574666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,10240,0.017602133750915527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,1536,0.0046623999873797095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,1536,0.015287466843922935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,1536,0.006295466423034668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,1024,0.003950933367013932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,1024,0.014968533317248026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,7168,0.013927466670672097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,512,0.014586666226387024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,768,0.0038719999293486277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,256,0.0031690667072931922
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,768,0.014748799800872802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,768,0.005363200108210246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,512,0.0034133332471052804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,512,0.005049600203831991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,256,0.01432960033416748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,128,0.0029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,32,0.0026869334280490874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,128,0.014178133010864258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,2048,64,0.00275093341867129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,64,0.013985066612561544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,2048,32,0.01423679987589518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,65536,0.0526965339978536
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,65536,0.04033493200937907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,1024,0.005596800148487091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,16384,0.017064533631006875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,16384,0.02186773419380188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,12288,0.019925334056218467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,16384,0.024780799945195518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,12288,0.013583999872207642
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,10240,0.012356266379356384
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,10240,0.019406932592391967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,8192,0.010781866312026978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,128,0.004487466812133789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,8192,0.018579200903574625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,7168,0.009954133629798889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,7168,0.01839359998703003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,7168,0.013926399747530618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,6144,0.009191466371218364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,65536,0.08442773024241129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,6144,0.017646932601928712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,5120,0.008541867136955261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,12288,0.019796266158421835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,2048,256,0.00488319993019104
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,5120,0.019803732633590698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,5120,0.010858666896820069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,10240,0.017378133535385133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,4096,0.007921066880226136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,4096,0.0195413331190745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,4096,0.009274666508038838
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,8192,0.014893866578737893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,3584,0.007683200140794118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,3584,0.01835839947064718
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,3584,0.008922666311264038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,3072,0.0071487997968991595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,3072,0.017899733781814576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,2560,0.008601599931716919
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,2560,0.01723946730295817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,2048,0.00804906686147054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,2048,0.01660480002562205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,1536,0.00631466656923294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,1536,0.016025599837303162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,1536,0.006363733112812043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,1024,0.005220266679922739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,1024,0.015358933806419372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,768,0.004584533472855886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,768,0.015014400084813436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,6144,0.012116266290346782
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,512,0.003909333298603693
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,512,0.014779733618100485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,2560,0.007580799857775371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,512,0.005092266698678335
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,2048,0.006788266698519389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,256,0.0032640000184377036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,256,0.014344533284505209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,128,0.0030037333567937215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,1024,0.005619200070699056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,128,0.014079999923706055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,768,0.005211733281612396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,3072,0.00807360013326009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,64,0.0028181334336598715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,64,0.014288000265757241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1536,32,0.002757333219051361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1536,32,0.014315733313560485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,65536,0.035903998215993244
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,256,0.004714666803677877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,65536,0.03206826647122701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,16384,0.01390506625175476
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,16384,0.020140800873438516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,12288,0.011238400141398113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1536,128,0.004621866842110952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,12288,0.01880319913228353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,10240,0.010401067137718201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,10240,0.018201599518458046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,10240,0.017107200622558594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,8192,0.009198932846387228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,8192,0.01727786660194397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,7168,0.008332799871762593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,7168,0.017435733477274576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,65536,0.0840778668721517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,6144,0.007738666733105977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,6144,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,5120,0.007619200150171916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,5120,0.023856000105539957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,4096,0.006392533580462138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,4096,0.02327466607093811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,3584,0.006676266590754191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,12288,0.019784533977508546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,3584,0.02248959938685099
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,3584,0.008731733759244282
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,3072,0.006150400141874949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,3072,0.021820799509684245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,3072,0.008020266890525818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,2560,0.013198933005332947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,2560,0.0204693337281545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,2560,0.007446399827798207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,8192,0.014353066682815552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,2048,0.011851732929547627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,7168,0.013012267152468362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,2048,0.019426133235295615
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,6144,0.011619200309117634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,2048,0.006714666883150737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,16384,0.02474986712137858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,5120,0.010386133193969726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,1536,0.009804800152778625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,4096,0.009224533041318258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,1536,0.018193066120147705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,1024,0.007765333354473114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,1024,0.016871466239293417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,768,0.0067007998625437425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,768,0.01609599987665812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,512,0.005176533261934916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,512,0.015436800320943198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,512,0.004957866668701172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,256,0.003996799886226654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,256,0.01495253344376882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,256,0.004741333425045013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,128,0.0033151999115943907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,128,0.014682666460673014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,64,0.003013333429892858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,64,0.014711466431617738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,1024,32,0.003048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,1024,32,0.014497066537539164
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,65536,0.029256532589594524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,65536,0.028449066480000812
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,16384,0.01111253301302592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,768,0.0051701332132021586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,16384,0.019074134031931558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,16384,0.024555732806523643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,12288,0.00925973355770111
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,12288,0.017544533809026083
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,128,0.0046015997727712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,10240,0.008661333719889324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,10240,0.017847466468811034
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,1536,0.006211199859778086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,10240,0.016999467213948568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,8192,0.00811839997768402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,8192,0.01709866722424825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,1024,1024,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,7168,0.007492266595363617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,7168,0.017293866475423178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,6144,0.006888533135255177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,65536,0.0833791971206665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,6144,0.01728960076967875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,5120,0.0066431999206542965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,5120,0.020102399587631225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,5120,0.010420266787211101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,4096,0.006109866499900818
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,4096,0.018652800718943277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,4096,0.009266133109728496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,3584,0.006419200201829274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,3584,0.018256000677744546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,3072,0.006020266811052958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,3072,0.017321600516637167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,3072,0.00798399994770686
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,2560,0.008666666348775227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,8192,0.014389333128929139
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,2560,0.01686613361040751
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,2560,0.007613866527875264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,2048,0.0069930667678515124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,7168,0.012982400258382163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,2048,0.01650773286819458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,6144,0.01157866617043813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,1536,0.006117333471775055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,1536,0.015811199943224587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,12288,0.019489065806070963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,1536,0.006306133170922597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,1024,0.004965333143870035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,1024,0.015380266308784484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,3584,0.00886079967021942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,768,0.0044821331898371375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,768,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,768,0.0052255998055140175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,512,0.003841066608826319
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,2048,0.006715733309586842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,512,0.014661332964897156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,256,0.0032778667906920114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,256,0.014586666226387024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,128,0.0029898665845394133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,128,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,128,0.004523733258247375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,64,0.0026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,64,0.014454399545987448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,768,32,0.0028309332827727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,1024,0.005569066603978475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,768,32,0.014407466848691305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,65536,0.02076693375905355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,65536,0.02367786765098572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,65536,0.08343893686930338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,16384,0.008996267120043437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,12288,0.01685973405838013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,16384,0.017056000232696534
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,16384,0.024600533644358318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,12288,0.007944533228874206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,256,0.004655999938646952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,10240,0.0075989335775375364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,10240,0.0171615997950236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,8192,0.006771199901898702
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,8192,0.017195733388264973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,7168,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,7168,0.01738026738166809
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,6144,0.0058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,6144,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,768,512,0.005154133339722951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,5120,0.006226133306821187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,5120,0.017691733439763387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,4096,0.005960533519585928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,4096,0.0172437330087026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,4096,0.009179733196894328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,3584,0.0064416001240412395
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,3584,0.016697599490483602
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,3072,0.005981866518656413
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,3072,0.016267733772595723
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,10240,0.01651306649049123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,2560,0.006185600161552429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,2560,0.015987199544906617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,8192,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,2048,0.005486933390299479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,7168,0.012940800189971924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,2048,0.01569493313630422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,2048,0.006791466474533081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,1536,0.0047882666190465295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,6144,0.011570133765538533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,1536,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,1024,0.004223999877770742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,12288,0.019273600975672402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,1024,0.005612800021966299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,1024,0.01507306694984436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,768,0.0038431999584039056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,768,0.01469013293584188
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,512,0.003499733408292135
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,512,0.014481066664059957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,256,0.0030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,3072,0.007973333199818928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,256,0.014382933576901754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,128,0.002773333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,128,0.014084266622861228
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,2560,0.007542400062084198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,64,0.0027306665976842242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,5120,0.010466133554776508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,64,0.01406719982624054
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,3584,0.00879039963086446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,1536,0.006203733384609222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,512,32,0.0027434666951497394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,512,32,0.014230400323867798
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,65536,0.013827199737230936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,768,0.00537066658337911
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,65536,0.020574933290481566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,16384,0.006638933221499126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,16384,0.01710933248202006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,256,0.004593066871166229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,12288,0.006203733384609222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,128,0.004481066763401031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,12288,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,12288,0.018795732657114664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,10240,0.006154666841030121
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,10240,0.017271467049916587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,10240,0.016245333353678386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,8192,0.0061141331990559895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,8192,0.017106133699417114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,512,512,0.005061333378156027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,7168,0.005932799975077311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,7168,0.017474132776260375
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,16384,0.023798400163650514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,6144,0.005901866654555003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,6144,0.016852267583211265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,5120,0.006236800054709116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,5120,0.017925333976745606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,4096,0.005857066810131073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,65536,0.08336640199025472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,8192,0.013893333077430726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,4096,0.01710933248202006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,4096,0.009239466985066731
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,3584,0.006445866823196411
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,3584,0.016598400473594666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,3072,0.00594346672296524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,3072,0.016141866644223533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,3072,0.008026666442553202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,2560,0.00603413333495458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,2560,0.015871999661127727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,2560,0.007527466615041096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,2048,0.005446400245030721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,7168,0.012813867131868998
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,2048,0.015598932902018229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,6144,0.011504000425338745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,2048,0.00676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,1536,0.004647466540336609
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,5120,0.010311466455459595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,1536,0.015586133797963461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,1024,0.004013866682847341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,1024,0.014939733346303306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,768,0.0035829332967599234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,768,0.014696533481280008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,3584,0.008683733145395915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,768,0.005226666728655497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,512,0.0032885332902272543
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,512,0.01455573340257009
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,256,0.002996266633272171
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,256,0.014396799604098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,128,0.002703999976317088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,1536,0.006291200220584869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,128,0.014138666788736978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,128,0.004494933287302653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,1024,0.005539200206597646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,64,0.0026069333155949908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,64,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,65536,0.018067200978597008
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,256,32,0.0026133333643277483
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,16384,0.005767466624577841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,256,32,0.01415786643822988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,65536,0.01034773290157318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,512,0.004794666667779287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,16384,0.017491199572881064
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,256,256,0.0046528001626332605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,16384,0.023763199647267662
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,12288,0.005836800237496694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,12288,0.017043199141820273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,10240,0.005959466596444448
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,10240,0.017038933436075845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,8192,0.0058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,8192,0.016780799627304076
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,8192,0.013942399621009826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,7168,0.0057546665271123254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,7168,0.017349332571029663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,65536,0.08204800287882487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,6144,0.005553066730499268
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,6144,0.017081600427627564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,12288,0.018739199638366698
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,5120,0.00602346658706665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,5120,0.01732906699180603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,5120,0.010273067156473796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,4096,0.005751466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,4096,0.016603733102480568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,3584,0.006042666733264923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,3584,0.016119466225306193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,3072,0.005696000158786773
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,10240,0.016365866859753928
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,3072,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,3072,0.007870933413505555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,2560,0.005746133128801982
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,2560,0.015591466426849365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,7168,0.012755200266838074
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,2048,0.005145599941412607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,1536,0.004572799801826477
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,2048,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,6144,0.011447466413180033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,1536,0.015003732840220132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,4096,0.009065600236256917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,3584,0.008769067128499348
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,1536,0.006259199976921081
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,1024,0.003946666667858759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,768,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,1024,0.015033599734306336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,1024,0.005464533468087515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,512,0.014618666966756186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,768,0.0035978667438030243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,768,0.0051360001166661584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,512,0.0032831999162832894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,2560,0.0074421331286430355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,512,0.004744533201058706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,256,0.002995199958483378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,256,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,2048,0.006676266590754191
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,128,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,128,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,128,0.004473599791526795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,64,0.0026079999903837843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,64,0.01420906682809194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,128,32,0.0027253332237402597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,128,32,0.014189866185188294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,65536,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,65536,0.017990400393803917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,16384,0.005639466643333435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,16384,0.01694719990094503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,12288,0.0056970665852228795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,12288,0.016819200913111367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,10240,0.005890133480230967
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,2,128,256,0.004609066744645437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,10240,0.017166932423909508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,8192,0.005682133138179779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,8192,0.01665173371632894
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,7168,0.005674666663010915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,7168,0.01714026729265849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,6144,0.0054624001185099285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,6144,0.016845866044362386
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,5120,0.006028800209363302
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,5120,0.017271467049916587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,4096,0.005445333321889242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,4096,0.01649066706498464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,3584,0.006046933432420095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,3584,0.016320000092188515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,3072,0.005644799768924713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,3072,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,2560,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,2560,0.01565120021502177
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,2048,0.004957866668701172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,2048,0.015236266454060874
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,1536,0.004390400151411692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,1536,0.015097600221633912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,1024,0.0037461332976818085
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,1024,0.014829867084821067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,768,0.0034634667138258614
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,768,0.014672000209490457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,512,0.003201066702604294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,512,0.014434132973353067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,256,0.0028917332490285236
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,256,0.014376533031463624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,128,0.0027093333502610523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,128,0.014082133769989014
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,64,0.002585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,64,0.014349866906801859
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,64,32,0.0026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,64,32,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,65536,0.007351466516653697
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,65536,0.018049067258834837
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,16384,0.005453866720199585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,16384,0.016717867056528727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,12288,0.005677866439024607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,12288,0.016850133736928306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,6144,0.005469866593678792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,10240,0.005703466633955637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,6144,0.017008000612258913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,10240,0.016976000865300496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,8192,0.005748266478379568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,8192,0.01660480002562205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,7168,0.00553599993387858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,7168,0.0172106663386027
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,5120,0.00595413347085317
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,4096,0.005515733361244201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,5120,0.017065600554148356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,4096,0.016454399625460307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,3584,0.0059914668401082356
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,3584,0.016546133160591125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,3072,0.0055061335364977515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,3072,0.01578986644744873
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,2560,0.005739733576774597
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,2560,0.01581653356552124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,2048,0.005049600203831991
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,2048,0.015357866883277893
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,1536,0.004390400151411692
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,1536,0.015051733454068503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,512,0.014308266838391624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,1024,0.003818666686614355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,256,0.014255999525388082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,1024,0.014898133277893067
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,128,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,768,0.003537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,768,0.014716800053914389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,512,0.003222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,256,0.002980266759792964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,128,0.0026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,64,0.0025642665723959604
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,64,0.014074666301409402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,2,32,32,0.0027114666998386385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,2,32,32,0.014151466886202493
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,16384,0.5204245249430339
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,16384,0.27319466272989906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,12288,0.39058561325073243
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,12288,0.20852479934692383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,12288,0.1938378651936849
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,10240,0.3363135973612467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,10240,0.20120213826497396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,8192,0.26197546323140464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,8192,0.1445312023162842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,8192,0.13085333506266278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,7168,0.23540266354878744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,16384,0.2568064053853353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,7168,0.12737706502278645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,6144,0.1084437370300293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,7168,0.12607786655426026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,6144,0.19646506309509276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,6144,0.11379733085632324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,5120,0.16452479362487793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,5120,0.09555413722991943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,5120,0.084989865620931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,4096,0.13275307019551594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,4096,0.079912535349528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,4096,0.0682207981745402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,3584,0.11554559866587322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,10240,0.16179092725118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,3584,0.07889280319213868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,3072,0.1027722676595052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,3072,0.06380053361256918
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,2560,0.08383146921793619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,2560,0.05523093144098917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,2048,0.0678218682607015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,2048,0.04835306803385417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,2048,0.03660693168640137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,1536,0.05159146785736084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,1536,0.03939626614252727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,1024,0.037384533882141115
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,1024,0.03123093247413635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,768,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,768,0.02744320034980774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,768,0.01834239959716797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,512,0.01955733299255371
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,512,0.022999467452367146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,512,0.014344533284505209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,256,0.011149866382280986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,256,0.01920106609662374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,128,0.007379200061162312
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,2560,0.04912853240966797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,128,0.016567466656366985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,64,0.004248533149560293
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,64,0.01611306667327881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,65536,32,0.004307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,65536,32,0.01594239970048269
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,3584,0.0654698650042216
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,1536,0.029205334186553956
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,16384,0.12465919653574627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,1024,0.021572266022364298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,3072,0.05285973151524862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,65536,0.5254645347595215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,65536,0.2752426783243815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,16384,0.07605439821879069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,16384,0.06882987022399903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,12288,0.09480746587117514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,12288,0.06770880222320556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,10240,0.07961066563924155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,10240,0.05379840135574341
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,256,0.010790399710337321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,8192,0.06389013528823853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,8192,0.046284798781077066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,7168,0.05675413211186727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,7168,0.04235733350118001
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,7168,0.03298453291257222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,6144,0.04907093445460002
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,6144,0.03848746617635091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,5120,0.04131946563720703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,5120,0.03464639981587728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,5120,0.02518933415412903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,4096,0.034137598673502606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,4096,0.030697600046793623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,12288,0.0572703997294108
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,4096,0.0213045338789622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,3584,0.029948800802230835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,3584,0.028742400805155437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,65536,128,0.008754133184750875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,3072,0.026315732796986895
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,3072,0.026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,65536,0.26053226788838707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,3072,0.017484800020853678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,2560,0.022525866826375328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,2560,0.025093332926432295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,2048,0.01881493330001831
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,8192,0.03721706469853719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,2048,0.02244159976641337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,2048,0.013353600104649862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,6144,0.029024000962575274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,1536,0.015011200308799743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,10240,0.04479786554972331
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,1536,0.020566399892171225
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,1024,0.009187199672063192
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,1024,0.011001599828402202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,1024,0.018797866503397622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,3584,0.019373865922292073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,768,0.008942932883898417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,2560,0.01664746701717377
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,768,0.017822933197021485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,512,0.007106133302052816
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,512,0.015258666872978211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,256,0.0040394666294256846
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,256,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,1536,0.011394133170445759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,128,0.0034282666941483817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,128,0.014711466431617738
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,64,0.003083733220895131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,64,0.014510933558146158
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,16384,32,0.0030271999537944795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,16384,32,0.014818132917086283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,16384,0.09779307047526041
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,65536,0.3911285400390625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,65536,0.20224852561950685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,16384,0.06278613408406576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,16384,0.05793600082397461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,12288,0.08321812947591146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,512,0.0064064001043637585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,12288,0.05375039974848429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,10240,0.06328426599502564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,10240,0.04627093474070231
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,256,0.0053269331653912864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,8192,0.05269546508789062
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,8192,0.04064746697743733
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,128,0.004834133386611939
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,7168,0.04674559831619263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,7168,0.03742080132166545
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,16384,768,0.008293333152929943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,6144,0.04055466651916504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,6144,0.03389546473821004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,5120,0.034030934174855546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,5120,0.03099626700083415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,4096,0.02827306588490804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,65536,0.2063360055287679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,4096,0.02754986683527629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,12288,0.04502933422724406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,4096,0.01945706605911255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,3584,0.0248906672000885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,3584,0.026156800985336303
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,3584,0.01694613297780355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,10240,0.037385598818461103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,3072,0.02187839945157369
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,3072,0.024296534061431885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,8192,0.03030719955762227
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,3072,0.014787200093269347
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,2560,0.019052799542744955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,2560,0.022737065951029457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,7168,0.02765333255132039
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,2048,0.01569386621316274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,2048,0.021203200022379555
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,5120,0.021205333868662517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,2048,0.011362133423487346
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,1536,0.012573867042859396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,1536,0.019452800353368126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,1536,0.00999679962793986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,1024,0.009429333607355754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,1024,0.01758400003115336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,2560,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,6144,0.024580266078313193
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,768,0.0077909335494041445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,768,0.015546666582425437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,768,0.007281066477298736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,512,0.006258133550484974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,512,0.015361066659291586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,256,0.0037952000896135964
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,256,0.014823466539382935
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,128,0.003340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,1024,0.008226133386294047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,128,0.014568533500035605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,32,0.0029674666623274487
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,12288,64,0.003078400095303853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,64,0.014620799819628397
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,12288,32,0.01455466647942861
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,512,0.005633066594600678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,65536,0.18021012941996256
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,65536,0.33237012227376306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,256,0.005067733426888784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,16384,0.0883413314819336
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,16384,0.056728533903757726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,12288,128,0.004640000065167745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,12288,0.06617173353830973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,12288,0.05261653264363607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,12288,0.03686720132827759
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,10240,0.05697280168533325
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,10240,0.03987840016682943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,16384,0.04740266799926758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,65536,0.1765984058380127
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,8192,0.04585173527399699
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,8192,0.03497386773427327
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,8192,0.026311467091242473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,7168,0.038022398948669434
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,7168,0.0323093334833781
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,10240,0.03243733247121175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,6144,0.037006934483846024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,6144,0.029364265998204547
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,6144,0.021363200743993123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,5120,0.02793813347816467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,5120,0.02753173311551412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,4096,0.023212800423304238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,7168,0.02373866637547811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,4096,0.024859732389450072
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,3584,0.020653865734736123
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,3584,0.023833600680033366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,3584,0.015155200163523355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,3072,0.018449066082636516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,3072,0.022072533766428627
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,3072,0.012919466694196066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,2560,0.015687466661135355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,5120,0.019074134031931558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,2560,0.02086826761563619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,2048,0.013238400220870972
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,4096,0.01569493313630422
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,2048,0.019802665710449217
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,1536,0.01066986620426178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,1536,0.018458666404088338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,1024,0.00801386684179306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,1024,0.017366399367650352
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,768,0.006914133330186208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,768,0.015145599842071533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,768,0.006287999947865804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,512,0.0054613331953684485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,512,0.015210666259129844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,2560,0.01216319998105367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,256,0.003521066655715307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,256,0.014749866724014283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,256,0.0049098665515581764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,128,0.003222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,128,0.014358400305112203
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,2048,0.010420266787211101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,64,0.002868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,1536,0.009202133615811665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,64,0.014249599973360696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,1024,0.007529599964618683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,10240,32,0.0028480000793933867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,10240,32,0.014509866635004679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,65536,0.25530667304992677
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,65536,0.1416437307993571
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,16384,0.06579413414001464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,512,0.00574186642964681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,10240,128,0.0045962666471799215
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,16384,0.04628159999847412
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,16384,0.04040000041325887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,12288,0.05719253222147623
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,12288,0.03907626469930013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,12288,0.030826665957768756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,10240,0.04282453457514445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,10240,0.03497813145319621
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,65536,0.1423360029856364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,8192,0.034653866291046144
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,8192,0.0315008004506429
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,8192,0.022207999229431154
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,7168,0.03073599934577942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,7168,0.02899199922879537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,7168,0.020616533358891805
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,6144,0.029546666145324706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,6144,0.027159466346104937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,5120,0.023117866118748984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,5120,0.024966400861740113
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,4096,0.019003732999165853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,4096,0.022833067178726196
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,10240,0.02582826614379883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,3584,0.01713386575380961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,3584,0.02207039992014567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,3072,0.015095466375350952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,3072,0.020719999074935914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,3072,0.011742933591206869
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,6144,0.018639999628067016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,2560,0.013261866569519044
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,2560,0.019828265905380248
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,2048,0.01123520036538442
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,2048,0.018959999084472656
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,1536,0.00918293297290802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,1536,0.017825067043304443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,1024,0.007225599884986877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,1024,0.015553067127863566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,1024,0.00653546651204427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,768,0.006142933170000712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,768,0.015340800086657206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,3584,0.0128330667813619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,5120,0.016148266196250916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,768,0.005770666897296906
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,512,0.004398933549722036
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,512,0.015057067076365152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,2560,0.01042133371035258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,512,0.005534933507442474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,256,0.0034527999659379324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,4096,0.01362986663977305
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,256,0.014396799604098
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,256,0.004990933338801066
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,1536,0.008433066805203756
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,128,0.003099733342727025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,128,0.014547200004259745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,64,0.003010133405526479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,64,0.014363732933998109
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,8192,32,0.002865066627661387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,8192,32,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,2048,0.009591466188430786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,65536,0.23067626953125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,65536,0.12703359921773275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,16384,0.060190931955973304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,65536,0.12662933667500814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,16384,0.04453866481781006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,12288,0.050714667638142905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,8192,128,0.004558933277924856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,12288,0.03686399857203166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,10240,0.039740800857543945
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,10240,0.03408960103988647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,10240,0.023617066939671836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,8192,0.03291093309720357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,8192,0.030431999762852983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,7168,0.02908160090446472
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,7168,0.02844799955685933
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,6144,0.02553279995918274
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,6144,0.02659519910812378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,6144,0.016696532567342125
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,5120,0.02215893268585205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,12288,0.028142933050791425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,5120,0.02371946573257446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,5120,0.014900267124176025
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,4096,0.019166932503382365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,4096,0.022292266289393105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,4096,0.012651733557383218
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,3584,0.017011199394861856
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,3584,0.021541333198547362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,3584,0.011875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,3072,0.015117866794268289
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,16384,0.036187732219696046
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,3072,0.02024959921836853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,7168,0.018465065956115724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,2560,0.012120532989501952
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,2560,0.019345066944758096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,2560,0.00998293360074361
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,2048,0.010499200224876404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,2048,0.01840106646219889
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,2048,0.00886079967021942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,1536,0.008627200126647949
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,1536,0.017452800273895265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,1024,0.006842666864395141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,1024,0.01515733301639557
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,8192,0.02059626579284668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,768,0.0058783998092015585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,768,0.015288533767064414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,512,0.0042026668787002565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,3072,0.010820266604423524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,512,0.01516480048497518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,256,0.014355199535687766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,256,0.0033962666988372804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,128,0.0031669333577156065
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,128,0.014181333780288696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,128,0.004689066608746847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,64,0.0029056000212828318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,1536,0.008076799909273784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,64,0.014286933342615762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,7168,32,0.0028309332827727
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,1024,0.006305066744486491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,7168,32,0.014629333217938741
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,256,0.004962133367856344
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,65536,0.19624212582906086
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,65536,0.10906986395517985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,16384,0.05147093137105306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,65536,0.11177173455556233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,16384,0.042166399955749514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,16384,0.031981867551803586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,768,0.005938133100668589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,12288,0.04684266646703084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,7168,512,0.005590400099754334
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,12288,0.03507839838663737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,12288,0.025011199712753295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,10240,0.036149334907531736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,10240,0.03227733373641968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,10240,0.02214933236440023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,8192,0.027749333779017133
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,8192,0.027811199426651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,8192,0.01898026665051778
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,7168,0.026800000667572023
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,7168,0.026923733949661254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,6144,0.021792000532150267
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,6144,0.02434239983558655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,5120,0.018499199549357095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,5120,0.0233130673567454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,5120,0.013895466923713684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,4096,0.016847999890645345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,4096,0.021810134251912437
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,4096,0.011821867028872172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,3584,0.014106667041778565
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,3584,0.020873600244522096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,7168,0.017004799842834473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,3584,0.011045333743095399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,3072,0.013464533289273582
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,3072,0.020045866568883262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,3072,0.010319999853769938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,6144,0.01539413332939148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,2560,0.011106133460998535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,2560,0.01909653345743815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,2048,0.010006399949391682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,2048,0.01785599986712138
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,2048,0.008649599552154542
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,1536,0.008262399832407634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,1536,0.01685439944267273
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,1536,0.0075103998184204105
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,1024,0.006397866706053417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,1024,0.015400532881418863
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,768,0.005347200234731038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,768,0.015425067146619162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,512,0.003957333415746689
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,2560,0.009528533617655436
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,512,0.014667733510335287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,256,0.0033333333830038703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,256,0.014845866958300272
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,256,0.0048885335524876915
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,128,0.00311253344019254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,128,0.01430293321609497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,64,0.0028351999819278715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,64,0.01418346663316091
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,768,0.005498666564623515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,6144,32,0.002828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,6144,32,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,512,0.0051146666208903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,65536,0.16294506390889485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,65536,0.09610880215962728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,65536,0.0939296007156372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,16384,0.047365331649780275
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,16384,0.03584853410720825
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,1024,0.0061258668700853985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,16384,0.02897599935531616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,12288,0.03962133328119914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,12288,0.030624000231424968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,10240,0.03078826665878296
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,10240,0.02783573269844055
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,8192,0.025550933678944905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,8192,0.025628799200057985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,6144,128,0.004619733492533366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,7168,0.023077332973480226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,7168,0.023873066902160643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,7168,0.015863466262817382
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,6144,0.01811520059903463
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,6144,0.022667733828226726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,12288,0.023601067066192628
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,6144,0.01404159963130951
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,5120,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,5120,0.021306665738423665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,8192,0.017196800311406454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,4096,0.013725866874059042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,4096,0.020139733950297035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,3584,0.012036266922950744
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,3584,0.019454934199651084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,10240,0.020568533738454183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,3072,0.010880000392595927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,3072,0.01872319976488749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,5120,0.012730666995048523
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,3072,0.00999679962793986
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,2560,0.00988159974416097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,4096,0.011098666985829671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,2560,0.01829973260561625
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,3584,0.010341333349545796
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,2048,0.008431999882062276
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,2048,0.01733120083808899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,1536,0.007281066477298736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,1536,0.015590399503707886
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,1024,0.005520000060399374
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,1024,0.015241600076357522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,1536,0.006793599824110668
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,1024,0.006106666723887126
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,768,0.0042912001411120095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,2560,0.009242666761080424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,768,0.014947199821472168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,512,0.003718400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,512,0.014919466773668923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,2048,0.008242133259773254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,256,0.0032266666491826378
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,128,0.014421332875887552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,256,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,128,0.003115733216206233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,128,0.004712533454100291
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,64,0.0027274665733178455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,64,0.014447999993960061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,5120,32,0.0028160000840822858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,65536,0.12722986539204914
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,5120,32,0.014281599720319112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,65536,0.07710506916046142
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,16384,0.0359007994333903
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,16384,0.03189439972241719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,768,0.005683200061321258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,12288,0.027841067314147948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,12288,0.02995733420054118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,256,0.004756266872088114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,12288,0.02164586583773295
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,10240,0.02582293351491292
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,10240,0.025649066766103106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,8192,0.021393066644668578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,8192,0.023729066054026283
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,65536,0.09085226853688558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,7168,0.022341332832972207
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,8192,0.015947733322779337
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,7168,0.01902079979578654
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,16384,0.025886933008829754
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,7168,0.014903466900189719
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,5120,512,0.005369600156943003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,6144,0.016721065839131674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,6144,0.021241599321365358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,5120,0.014908799529075622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,5120,0.02030506730079651
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,5120,0.012122666835784912
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,10240,0.018923733631769815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,4096,0.012692266702651977
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,4096,0.019127466281255088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,3584,0.01170133352279663
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,3584,0.018603734175364175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,3584,0.010403199990590414
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,3072,0.01053439974784851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,3072,0.017947733402252197
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,2560,0.00860800047715505
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,6144,0.013062399625778199
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,2560,0.017650133371353148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,2560,0.008804266651471455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,2048,0.007578666508197785
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,2048,0.01560426652431488
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,1536,0.00662720004717509
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,1536,0.015569067001342774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,1024,0.004668800036112467
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,4096,0.011018666625022887
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,1024,0.015050666530927024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,1024,0.005584000051021576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,768,0.003970133264859518
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,768,0.014919466773668923
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,512,0.004994133114814758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,768,0.00550186683734258
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,512,0.003519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,512,0.014727466305096946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,3072,0.00939626693725586
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,256,0.0031776001056035364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,256,0.014306132992108664
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,1536,0.006481066842873891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,128,0.0029056000212828318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,128,0.014070399602254233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,128,0.004507733384768168
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,64,0.002829866607983907
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,2048,0.00754559983809789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,64,0.014452266693115234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,4096,32,0.0027061333258946735
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,4096,32,0.014113066593805948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,65536,0.11713173389434814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,65536,0.07419306437174479
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,65536,0.0904970645904541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,16384,0.03190400004386902
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,16384,0.030139732360839843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,4096,256,0.0049098665515581764
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,12288,0.025290666023890178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,12288,0.026779733101526898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,8192,0.018756266434987387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,10240,0.02218773365020752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,10240,0.02373866637547811
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,8192,0.022141865889231362
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,8192,0.015476266543070475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,7168,0.016979199647903443
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,16384,0.02567360003789266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,7168,0.02174506584803263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,7168,0.014891733725865683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,6144,0.01492586632569631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,6144,0.020680532852808634
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,6144,0.013543466726938883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,5120,0.0134442667166392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,5120,0.01975253423055013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,5120,0.012167466680208842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,4096,0.011901866396268208
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,4096,0.018609066804250084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,3584,0.010941867033640544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,10240,0.01880319913228353
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,3584,0.01861013372739156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,3072,0.009124267101287841
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,3584,0.010433066884676616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,3072,0.010166399677594503
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,12288,0.021679999430974324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,3072,0.017729065815607705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,2560,0.008083199958006541
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,2560,0.017026132345199584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,1536,0.01527253290017446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,2048,0.007097599903742473
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,2048,0.015624533096949259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,2048,0.0071936001380284624
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,1536,0.006380799909432728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,1536,0.006478933493296306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,1024,0.004279466470082601
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,4096,0.01087679962317149
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,2560,0.008191999793052674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,1024,0.015064533551534018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,768,0.004004266609748204
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,768,0.014659200112024942
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,512,0.003479466587305069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,256,0.014517333110173544
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,1024,0.00574186642964681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,512,0.014966400464375815
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,512,0.005013333261013031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,256,0.003209600100914637
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,768,0.0054613331953684485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,128,0.0028031999866167706
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,128,0.014262400070826211
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,64,0.00276799996693929
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,64,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3584,32,0.0027669332921504974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3584,32,0.014427733421325684
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,65536,0.10196800231933593
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,65536,0.06330453157424927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,16384,0.029838933547337847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,16384,0.028940800825754804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,12288,0.022539732853571574
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,12288,0.02484053373336792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,12288,0.02045546571413676
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,256,0.004870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,10240,0.02095359961191813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,10240,0.023196800549825033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,8192,0.01775146722793579
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3584,128,0.004456533491611481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,8192,0.02193386753400167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,8192,0.01543786625067393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,7168,0.016223999857902526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,65536,0.08486826419830322
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,7168,0.020777599016825358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,6144,0.014353066682815552
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,6144,0.020113066832224528
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,10240,0.017944532632827758
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,5120,0.01244586706161499
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,5120,0.019195733467737834
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,4096,0.011199999849001567
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,4096,0.018453333775202432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,4096,0.010327466328938802
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,3584,0.010384000341097514
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,3584,0.01813439925511678
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,3072,0.00946666697661082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,16384,0.02560746669769287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,3072,0.017257599035898845
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,7168,0.014215466380119324
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,2560,0.007750399907430013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,6144,0.012761599818865457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,2560,0.015734400351842245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,2048,0.0068245331446329755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,2048,0.01591146687666575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,2048,0.0072970668474833175
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,1536,0.005858133236567179
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,1536,0.015245866775512696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,1536,0.006402133405208588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,3584,0.009950932860374451
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,1024,0.004188799858093261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,1024,0.015065600474675497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,768,0.003955200066169103
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,2560,0.008089600006739299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,5120,0.01188693344593048
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,768,0.014962133765220643
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,512,0.003626666714747747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,512,0.014637866616249084
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,512,0.005075199902057648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,256,0.003270400067170461
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,3072,0.009065600236256917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,256,0.01446613371372223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,128,0.002841600030660629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,1024,0.005622399846712748
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,128,0.014199466506640116
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,128,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,64,0.0028223998844623564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,64,0.014401066303253173
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,768,0.005294933418432872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,3072,32,0.002746666719516118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,3072,32,0.014153599739074707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,65536,0.0842357317606608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,65536,0.055301332473754884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,16384,0.025675733884175617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,16384,0.02581546703974406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,16384,0.025380265712738038
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,12288,0.02026559909184774
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,3072,256,0.004811733464399974
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,12288,0.022898133595784506
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,10240,0.01735360026359558
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,10240,0.02172373334566752
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,10240,0.01773866613705953
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,8192,0.015003732840220132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,8192,0.02044693430264791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,65536,0.08472959995269776
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,7168,0.013684266805648803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,7168,0.019717333714167277
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,7168,0.01418773333231608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,6144,0.012310399611790975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,6144,0.019107200702031455
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,5120,0.010920533537864685
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,5120,0.01907093326250712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,4096,0.009958400328954061
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,12288,0.02029119928677877
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,4096,0.01801066597302755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,8192,0.015347199638684592
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,4096,0.010314666231473287
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,3584,0.00909866690635681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,5120,0.011506133278210958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,3584,0.017640533049901326
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,3072,0.008684800068537394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,3072,0.015953066945075988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,2560,0.007401599983374278
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,2560,0.016081066926320393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,2048,0.006458666423956554
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,6144,0.012934399644533792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,2048,0.015434666474660238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,1536,0.005332266787687937
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,1536,0.015492266416549683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,3072,0.008322133123874665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,1024,0.004223999877770742
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,1024,0.015059199929237366
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,2560,0.008003200093905132
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,1024,0.005670399963855743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,768,0.003918933371702829
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,768,0.014922666549682616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,2048,0.006865066786607106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,768,0.00535999983549118
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,512,0.0035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,512,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,512,0.005151999990145365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,3584,0.009565866986910502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,256,0.003223466624816259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,1536,0.006484266618887584
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,256,0.014158933361371358
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,128,0.002883200099070867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,128,0.014317867159843446
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,128,0.004593066871166229
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,64,0.002799999962250392
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,64,0.014366933703422546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2560,32,0.00264533335963885
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2560,32,0.014085333546002707
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,65536,0.06616853475570679
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,65536,0.04765866597493489
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,16384,0.020617600282033285
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,16384,0.023512534300486245
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,16384,0.024885332584381102
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,12288,0.01783039967219035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,12288,0.02127466599146525
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2560,256,0.004644266764322917
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,12288,0.01999680002530416
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,10240,0.015610667069753012
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,10240,0.020827732483545938
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,8192,0.013511466979980468
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,8192,0.01983039975166321
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,7168,0.01193386713663737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,7168,0.019172267119089762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,7168,0.013987200458844504
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,6144,0.011191466450691223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,6144,0.0184661328792572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,5120,0.010121599833170573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,5120,0.0207914670308431
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,65536,0.08454399903615316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,4096,0.008739200234413148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,4096,0.01933120091756185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,10240,0.0175327996412913
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,4096,0.009905067086219788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,3584,0.008285866677761078
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,8192,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,3584,0.01783039967219035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,3072,0.0076799998680750535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,6144,0.0125408003727595
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,3072,0.01663040022055308
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,3072,0.008121599753697712
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,2560,0.007143466671307881
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,5120,0.011145599683125814
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,2560,0.01599360009034475
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,2048,0.006041599810123444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,2048,0.015689599514007568
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,2048,0.006876799960931141
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,1536,0.0046623999873797095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,1536,0.01520853340625763
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,3584,0.009126399954160053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,1024,0.004071466624736786
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,1024,0.014908799529075622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,2560,0.007649066547552745
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,768,0.0036501333117485045
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,768,0.0144896000623703
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,1536,0.006351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,768,0.005371733506520589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,512,0.0033386667569478357
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,512,0.014631467064221701
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,256,0.0030389333764712016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,256,0.014115200440088908
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,128,0.0029834667841593427
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,128,0.01420266628265381
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,128,0.004578133424123129
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,64,0.002657066782315572
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,64,0.014233600099881491
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,2048,32,0.0027413333455721537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,2048,32,0.014197333653767904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,1024,0.0056639999151229855
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,65536,0.05329066514968872
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,512,0.004877866804599762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,65536,0.03964373270670573
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,16384,0.016588800152142844
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,16384,0.022016000747680665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,2048,256,0.00461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,10240,0.019310933351516724
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,12288,0.01400106648604075
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,12288,0.019665066401163736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,12288,0.019738666216532388
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,10240,0.012717866897583007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,8192,0.010880000392595927
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,8192,0.018683733542760213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,8192,0.014968533317248026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,7168,0.01020906666914622
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,65536,0.0843722661336263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,7168,0.018523732821146645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,6144,0.00929813285668691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,6144,0.017514665921529136
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,6144,0.012242133418718975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,5120,0.0086709330479304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,5120,0.020268799861272176
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,10240,0.017357865969340004
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,5120,0.010575999816258747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,4096,0.007864533364772797
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,16384,0.02485439976056417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,4096,0.019451733430226645
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,4096,0.00927786628405253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,3584,0.007575466732184092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,3584,0.018986666202545167
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,3072,0.006964266796906789
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,3072,0.018252799908320107
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,3072,0.00811839997768402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,7168,0.013784533739089966
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,2560,0.00937600036462148
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,2560,0.017194666465123496
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,3584,0.008761599659919739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,2048,0.008057599763075511
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,2048,0.01675093372662862
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,2048,0.006759466727574666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,1536,0.006694399813810985
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,1536,0.016173866391181946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,1024,0.005115733544031779
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,768,0.014995200435320535
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,768,0.005273599922657013
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,1024,0.015416533748308817
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,768,0.004457599918047587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,2560,0.007500799993673961
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,512,0.0038122666378815973
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,512,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,256,0.0032000000278155005
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,256,0.014574933052062988
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,128,0.0030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,1536,0.00621973325808843
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,128,0.014337066809336343
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,128,0.004600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,512,0.0049685334165891016
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,64,0.002841600030660629
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,1024,0.005513600011666616
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1536,256,0.004763733347256978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,64,0.014166399836540222
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1536,32,0.0028170667588710784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1536,32,0.014189866185188294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,65536,0.03575786749521891
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,65536,0.03189546664555867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,65536,0.08364480336507162
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,16384,0.013927466670672097
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,16384,0.019734400510787963
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,16384,0.02450986703236898
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,12288,0.011476266384124755
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,12288,0.018771199385325114
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,12288,0.019731199741363524
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,10240,0.010284800330797832
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,10240,0.01834986607233683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,10240,0.017220266660054526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,8192,0.009143466750780743
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,8192,0.016966400543848674
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,7168,0.00844373305638631
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,7168,0.017605332533518474
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,6144,0.007645866771539052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,6144,0.017232000827789307
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,5120,0.007533866663773854
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,5120,0.025675733884175617
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,8192,0.014622933665911355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,4096,0.006498133142789205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,7168,0.013244799772898355
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,4096,0.024209066232045492
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,4096,0.009176533420880635
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,3584,0.0067114666104316715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,3584,0.023005867004394533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,3072,0.0060032000144322716
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,6144,0.011668266852696736
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,3072,0.02175253431002299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,2560,0.015377066532770791
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,5120,0.010386133193969726
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,2560,0.020350933074951172
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,2560,0.007613866527875264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,2048,0.012732799847920737
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,2048,0.01921280026435852
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,2048,0.00681386689345042
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,1536,0.010295466581980387
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,1536,0.017831466595331826
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,1024,0.007681066791216533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,3072,0.00781333347161611
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,1024,0.016643200318018594
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,512,0.005145599941412607
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,3584,0.00886293351650238
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,768,0.006436266501744588
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,768,0.015761066476504007
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,256,0.014906666676203408
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,512,0.015275733669598899
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,512,0.004940799872080485
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,256,0.003953066716591517
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,1536,0.0062613333264986675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,128,0.0032074667513370516
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,128,0.014273066322008768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,64,0.002924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,768,0.005297066768010458
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,64,0.014582399527231851
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,1024,32,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,1024,32,0.014607999722162882
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,65536,0.028999465703964233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,65536,0.02797120014826457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,256,0.004752000172932943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,16384,0.010949333508809406
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,16384,0.018675200144449868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,16384,0.024681599934895833
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,1024,0.0056991999348004665
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,12288,0.009302399555842082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,12288,0.01726400057474772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,12288,0.01926506757736206
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,10240,0.00877333382765452
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,10240,0.01718399922053019
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,8192,0.007980800171693166
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,10240,0.016611199577649435
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,8192,0.01720106601715088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,7168,0.007493333518505096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,65536,0.08392319679260254
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,1024,128,0.0045973335703214015
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,7168,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,6144,0.006758399804433187
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,8192,0.014148267110188803
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,6144,0.017115734020868936
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,5120,0.006620799998442332
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,5120,0.020170666774113975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,5120,0.010397866368293762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,7168,0.012840533256530761
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,4096,0.0059797331690788266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,4096,0.019241599241892497
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,6144,0.0116266667842865
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,3584,0.006302933394908905
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,3584,0.01841920018196106
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,3072,0.006002133091290792
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,3072,0.017717333634694417
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,2560,0.008887466788291932
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,2560,0.017171200116475424
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,2560,0.007623466849327088
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,2048,0.007645866771539052
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,2048,0.016568533579508462
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,2048,0.006832000116507213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,1536,0.00622506688038508
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,1536,0.015801599621772765
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,4096,0.009253333012262981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,1024,0.005011199911435445
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,1024,0.01543786625067393
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,768,0.00456639975309372
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,3072,0.007853866616884867
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,768,0.014899200201034546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,512,0.003861333429813385
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,3584,0.008844799796740214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,1536,0.0062730665008227035
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,512,0.01479039986928304
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,512,0.005004799862702688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,256,0.0030975999931494398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,256,0.014598400394121806
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,128,0.0029557332396507262
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,1024,0.005550933380921682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,768,0.00518506666024526
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,128,0.014350933829943338
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,64,0.002733866622050603
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,64,0.014410666624704995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,768,32,0.0027434666951497394
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,768,32,0.014264532923698425
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,65536,0.020568533738454183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,128,0.00462719996770223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,65536,0.023925334215164185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,16384,0.009078400333722432
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,16384,0.01760639945665995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,16384,0.024413865804672242
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,12288,0.007760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,768,256,0.004817066589991251
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,12288,0.016850133736928306
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,12288,0.01915839910507202
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,10240,0.00761706680059433
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,10240,0.01725226640701294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,8192,0.0066538666685422255
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,8192,0.016769067446390788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,8192,0.014028799533843995
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,7168,0.006340266764163971
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,7168,0.017435733477274576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,7168,0.012837333480517068
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,6144,0.005883733431498209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,5120,0.017972266674041747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,65536,0.08378346761067709
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,6144,0.01685653328895569
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,5120,0.006347733239332835
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,5120,0.010332799951235453
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,4096,0.005784533421198527
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,10240,0.016571733355522155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,4096,0.01716053287188212
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,3584,0.006459733347098033
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,3584,0.016807466745376587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,3072,0.0058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,3072,0.016203733285268147
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,6144,0.011534933249155681
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,3072,0.007957333326339721
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,2560,0.006166400015354156
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,4096,0.00915839970111847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,2560,0.015974400440851848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,2560,0.007495466868082683
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,2048,0.0054293334484100345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,3584,0.008689066767692566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,2048,0.015826132893562318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,1536,0.004747733473777771
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,1536,0.015314132968584696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,1536,0.0062613333264986675
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,1024,0.004141866664091746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,1024,0.014816000064214071
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,768,0.0037791999677817024
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,768,0.014753066500027976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,512,0.0035071998834609987
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,512,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,256,0.003032533327738444
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,256,0.014452266693115234
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,128,0.0028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,128,0.014133333166440328
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,64,0.002717866748571396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,64,0.01418773333231608
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,512,32,0.0026943999032179515
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,512,32,0.014046933253606161
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,65536,0.013894400000572205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,65536,0.02047146757443746
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,65536,0.08337706724802653
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,768,0.005099733173847198
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,2048,0.006774400174617767
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,16384,0.006515199939409892
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,16384,0.01695466637611389
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,512,0.005004799862702688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,128,0.004439466694990794
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,12288,0.006298666695753734
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,12288,0.016901334126790367
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,10240,0.006145066519578298
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,10240,0.017199999094009398
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,1024,0.005676800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,10240,0.016375466187795003
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,8192,0.006071466704209646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,8192,0.01691733400026957
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,8192,0.01394773324330648
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,7168,0.0059797331690788266
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,7168,0.01714986761411031
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,6144,0.005870933334032694
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,6144,0.016951467593510947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,5120,0.006311466793219249
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,512,256,0.004715733230113983
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,5120,0.01759679913520813
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,5120,0.01030293305714925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,4096,0.006039466460545858
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,16384,0.023772799968719484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,12288,0.018778665860493978
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,4096,0.016987733046213784
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,3584,0.006398933132489522
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,7168,0.0127893328666687
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,3584,0.016350932916005454
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,3584,0.00864746669928233
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,3072,0.005884799857934316
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,3072,0.016376533110936484
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,3072,0.007863466441631318
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,2560,0.006082133452097575
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,2560,0.015794133146603904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,4096,0.009091200431187947
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,2048,0.005299200117588043
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,6144,0.011709866921106975
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,2048,0.01567893326282501
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,2048,0.006631466746330261
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,1536,0.0046304002404212955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,1536,0.015145599842071533
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,1024,0.003939199944337209
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,1024,0.014819199840227762
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,1024,0.005646933118502299
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,768,0.00367253323396047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,768,0.014737066626548768
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,2560,0.007406933108965556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,512,0.003399466723203659
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,512,0.014588800072669984
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,256,0.0030271999537944795
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,256,0.014265599846839904
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,256,0.004503466685612996
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,128,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,128,0.01411733329296112
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,1536,0.006278400123119354
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,64,0.0026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,64,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,256,32,0.002603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,256,32,0.014225066701571146
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,512,0.00489279975493749
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,65536,0.010499200224876404
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,128,0.004462933540344239
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,65536,0.017997866868972777
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,16384,0.005850666761398315
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,256,768,0.005277866621812185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,16384,0.01691306630770365
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,12288,0.005793066819508871
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,12288,0.016448000073432924
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,12288,0.018658133347829182
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,10240,0.005904000004132589
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,65536,0.08292160034179688
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,10240,0.0169322669506073
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,8192,0.0060127998391787205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,8192,0.01667840083440145
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,7168,0.005684266487757364
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,7168,0.016939733425776163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,7168,0.012676266829172769
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,6144,0.005539200206597646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,6144,0.016637866695721946
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,6144,0.011506133278210958
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,5120,0.005973333120346069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,5120,0.01725226640701294
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,5120,0.010257066289583842
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,4096,0.005679999788602194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,4096,0.01649066706498464
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,3584,0.005977599819501241
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,3584,0.016249600052833556
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,3072,0.005862399935722351
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,3584,0.008593066533406576
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,16384,0.02362133264541626
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,8192,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,3072,0.015780267119407655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,2560,0.005716266731421152
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,2560,0.015544533729553223
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,2560,0.007527466615041096
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,2048,0.005220266679922739
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,2048,0.015452800194422403
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,10240,0.01641386648019155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,1536,0.0045056000351905824
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,1536,0.015174399813016257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,4096,0.00907306671142578
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,1536,0.006159999966621399
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,1024,0.003769599894682566
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,1024,0.014786133170127868
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,768,0.003610666592915853
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,768,0.014867200454076131
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,768,0.0051242664456367494
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,512,0.0032970666885375976
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,512,0.014541866381963095
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,256,0.0029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,2048,0.006667733192443848
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,256,0.014260266224543253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,1024,0.005539200206597646
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,256,0.004689066608746847
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,128,0.0027413333455721537
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,3072,0.007897600034872691
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,128,0.01420906682809194
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,64,0.002701866626739502
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,64,0.014154666662216186
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,128,32,0.002586666742960612
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,128,32,0.014141866564750671
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,65536,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,65536,0.017655466000239053
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,16384,0.005635199944178263
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,16384,0.01670080025990804
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,12288,0.005576533575852713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,12288,0.016552533706029257
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,512,0.00499839981396993
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,10240,0.005941333373387655
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,10240,0.016925867398579916
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,8192,0.005717333157857259
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8_block,1,128,128,0.004456533491611481
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,8192,0.016452266772588094
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,7168,0.005578666428724925
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,6144,0.005644799768924713
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,7168,0.017170133193333943
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,6144,0.016633599996566772
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,5120,0.005898666878541311
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,5120,0.016973867019017538
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,4096,0.005406933526198069
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,4096,0.016456533471743265
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,3584,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,3584,0.016545066237449647
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,3072,0.005785599847634633
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,3072,0.015763200322786965
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,2560,0.005619200070699056
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,2560,0.015575466553370157
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,2048,0.0050016000866889955
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,2048,0.015149866541226705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,1536,0.004436266422271728
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,1536,0.015149866541226705
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,1024,0.003874133278926214
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,1024,0.014844800035158793
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,768,0.00346666673819224
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,768,0.014600533246994018
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,512,0.003235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,512,0.01458346645037333
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,256,0.0029237332443396253
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,256,0.014636799693107605
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,128,0.0027402666707833606
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,65536,0.0180074671904246
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,16384,0.005829333265622457
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,128,0.014135467012723288
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,64,0.002611200014750163
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,64,0.0141567995150884
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,64,32,0.002643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,64,32,0.0141184002161026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,65536,0.007913599908351897
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,16384,0.017118932803471883
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,12288,0.005726933479309082
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,7168,0.0057087997595469155
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,12288,0.016747732957204185
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,10240,0.005743999779224396
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,6144,0.005593599875768026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,10240,0.016938666502634682
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,8192,0.005568000177542368
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,8192,0.016666666666666666
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,7168,0.017177599668502807
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,6144,0.016617600123087564
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,5120,0.005782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,5120,0.01736746629079183
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,4096,0.005500799914201101
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,4096,0.01660266617933909
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,3584,0.005965866645177205
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,3584,0.016122666994730632
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,3072,0.005749333401521047
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,3072,0.015742933750152587
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,2560,0.0057781333724657696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,2048,0.0051584000388781226
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,2560,0.015589333573977151
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,2048,0.015245866775512696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,1536,0.004407466451327006
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,1536,0.014967466394106546
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,1024,0.0038805333276589714
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,1024,0.014713600277900696
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,768,0.003626666714747747
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,768,0.014470400412877402
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,512,0.0031818665564060213
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,512,0.014291200041770934
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,256,0.00290133332212766
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,256,0.014113066593805948
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,128,0.0028522667785485585
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,128,0.013989333311716715
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,64,0.0026549334327379864
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,64,0.0141184002161026
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,float16,1,32,32,0.002525866776704788
TRTLLM,1.2.0rc5,NVIDIA H200,gemm,torch_flow,fp8,1,32,32,0.013940266768137612
